This is an extremely rich data set with features that are not anonymized, so it makes an excellent playground for exploratory data analysis.
Training Data
library(data.table)
library(tidyverse)
library(lubridate)
library(scales)
library(corrplot)
library(DT)
library(daml)
library(plotly)
library(GGally)
library(knitr)
train <- as.data.frame(fread('house_price_train.csv', stringsAsFactors=TRUE))
test <- as.data.frame(fread('house_price_test.csv', stringsAsFactors=TRUE))
Some variables can appear to be numeric, but actually are factors: MSSubClass
# numeric
train %>%
select(MSSubClass) %>%
str()
## 'data.frame': 1460 obs. of 1 variable:
## $ MSSubClass: int 60 20 60 70 60 50 20 60 50 190 ...
train <- train %>%
mutate(MSSubClass = as.factor(MSSubClass))
test <- test %>%
mutate(MSSubClass = as.factor(MSSubClass))
# actually have 9-15 levels and are factors
train %>%
select(MSSubClass) %>%
str()
## 'data.frame': 1460 obs. of 1 variable:
## $ MSSubClass: Factor w/ 15 levels "20","30","40",..: 6 1 6 7 6 5 1 6 5 15 ...
There are features that show us year of construction/reconstruction.
p <- ggplot(aes(x = YearBuilt, y = SalePrice), data = train) +
geom_point(color='blue')
ggplotly(p)
p <- ggplot(aes(x = GarageYrBlt, y = SalePrice), data = train) +
geom_point(color='blue')
ggplotly(p)
Id variable should be converted to character and also lets check correlation between YearBuilt and GarageYrBlt
train %>%
select(contains("year"), contains("yr"), Id) %>%
str()
## 'data.frame': 1460 obs. of 5 variables:
## $ YearBuilt : int 2003 1976 2001 1915 2000 1993 2004 1973 1931 1939 ...
## $ YearRemodAdd: int 2003 1976 2002 1970 2000 1995 2005 1973 1950 1950 ...
## $ GarageYrBlt : int 2003 1976 2001 1998 2000 1993 2004 1973 1931 1939 ...
## $ YrSold : int 2008 2007 2008 2006 2008 2009 2007 2009 2008 2008 ...
## $ Id : int 1 2 3 4 5 6 7 8 9 10 ...
train <- train %>%
mutate(Id = as.character(Id))
test <- test %>%
mutate(Id = as.character(Id))
cor(train$YearBuilt,train$GarageYrBlt,use="complete.obs")
## [1] 0.8256675
How much data is missing?
miss_pct <- map_dbl(train, function(x) { round((sum(is.na(x)) / length(x)) * 100, 1) })
miss_pct <- miss_pct[miss_pct > 0]
p <- data.frame(miss=miss_pct, var=names(miss_pct), row.names=NULL) %>%
ggplot(aes(x=reorder(var, -miss), y=miss)) +
geom_bar(stat='identity', fill='red') +
labs(x='', y='% missing', title='Percent missing data by feature') +
theme(axis.text.x=element_text(angle=90, hjust=1))
ggplotly(p)
names(train)
## [1] "Id" "MSSubClass" "MSZoning" "LotFrontage"
## [5] "LotArea" "Street" "Alley" "LotShape"
## [9] "LandContour" "Utilities" "LotConfig" "LandSlope"
## [13] "Neighborhood" "Condition1" "Condition2" "BldgType"
## [17] "HouseStyle" "OverallQual" "OverallCond" "YearBuilt"
## [21] "YearRemodAdd" "RoofStyle" "RoofMatl" "Exterior1st"
## [25] "Exterior2nd" "MasVnrType" "MasVnrArea" "ExterQual"
## [29] "ExterCond" "Foundation" "BsmtQual" "BsmtCond"
## [33] "BsmtExposure" "BsmtFinType1" "BsmtFinSF1" "BsmtFinType2"
## [37] "BsmtFinSF2" "BsmtUnfSF" "TotalBsmtSF" "Heating"
## [41] "HeatingQC" "CentralAir" "Electrical" "1stFlrSF"
## [45] "2ndFlrSF" "LowQualFinSF" "GrLivArea" "BsmtFullBath"
## [49] "BsmtHalfBath" "FullBath" "HalfBath" "BedroomAbvGr"
## [53] "KitchenAbvGr" "KitchenQual" "TotRmsAbvGrd" "Functional"
## [57] "Fireplaces" "FireplaceQu" "GarageType" "GarageYrBlt"
## [61] "GarageFinish" "GarageCars" "GarageArea" "GarageQual"
## [65] "GarageCond" "PavedDrive" "WoodDeckSF" "OpenPorchSF"
## [69] "EnclosedPorch" "3SsnPorch" "ScreenPorch" "PoolArea"
## [73] "PoolQC" "Fence" "MiscFeature" "MiscVal"
## [77] "MoSold" "YrSold" "SaleType" "SaleCondition"
## [81] "SalePrice"
Lets take a look at features with most missing values.
We have to think about assigning values instad of NAs or dropping the feature.
train %>%
select(PoolQC, MiscFeature,Alley,Fence,FireplaceQu,LotFrontage) %>%
summary()
## PoolQC MiscFeature Alley Fence FireplaceQu LotFrontage
## Ex : 2 Gar2: 2 Grvl: 50 GdPrv: 59 Ex : 24 Min. : 21.00
## Fa : 2 Othr: 2 Pave: 41 GdWo : 54 Fa : 33 1st Qu.: 59.00
## Gd : 3 Shed: 49 NA's:1369 MnPrv: 157 Gd :380 Median : 69.00
## NA's:1453 TenC: 1 MnWw : 11 Po : 20 Mean : 70.05
## NA's:1406 NA's :1179 TA :313 3rd Qu.: 80.00
## NA's:690 Max. :313.00
## NA's :259
Features like PoolQC and MiscFeature have a very low number of meaningful values.
We will drop those two, for other features we will replace NA with a value like None.
train <- train %>%
select(-PoolQC,-MiscFeature)
cat_columns <- train %>%
mutate_if(is.factor,as.character) %>%
select_if(is.character) %>%
names()
train_cat <- train %>%
select(cat_columns) %>%
mutate_if(is.factor,as.character) %>%
replace(., is.na(.), "None") %>%
mutate_if(is.character,as.factor)
train_num <- train %>%
select(-cat_columns)
train <- bind_cols(train_num,train_cat)
test <- test %>%
select(-PoolQC,-MiscFeature)
test_cat <- test %>%
select(cat_columns) %>%
mutate_if(is.factor,as.character) %>%
replace(., is.na(.), "None") %>%
mutate_if(is.character,as.factor)
test_num <- test %>%
select(-cat_columns)
test <- bind_cols(test_num,test_cat)
miss_pct <- map_dbl(train, function(x) { round((sum(is.na(x)) / length(x)) * 100, 1) })
miss_pct <- miss_pct[miss_pct > 0]
p <- data.frame(miss=miss_pct, var=names(miss_pct), row.names=NULL) %>%
ggplot(aes(x=reorder(var, -miss), y=miss)) +
geom_bar(stat='identity', fill='red') +
labs(x='', y='% missing', title='Percent missing data by feature') +
theme(axis.text.x=element_text(angle=90, hjust=1))
ggplotly(p)
Lets check a histogram for our target - SalePrice:
p <- ggplot(aes(x = SalePrice), data = train) +
geom_histogram(fill='red',bins = 50, color = "black",aes(y=..density..)) +
geom_density(aes(y=..density..), color = "blue", size = 0.5) +
ggtitle('SalePrice Histogram')
ggplotly(p)
Target isn’t normally distributed, log transformation solves the problem:
p <- ggplot(aes(x = log(SalePrice)), data = train) +
geom_histogram(fill='red',bins = 50, color = "black",aes(y=..density..)) +
geom_density(aes(y=..density..), color = "blue", size = 0.5) +
ggtitle('Log SalePrice Histogram')
ggplotly(p)
Lets take a look at the correlation matrix for numeric predictors
# variables that are internal to home
num_columns <- train %>%
select_if(is.numeric) %>%
names()
print(num_columns)
## [1] "LotFrontage" "LotArea" "OverallQual" "OverallCond"
## [5] "YearBuilt" "YearRemodAdd" "MasVnrArea" "BsmtFinSF1"
## [9] "BsmtFinSF2" "BsmtUnfSF" "TotalBsmtSF" "1stFlrSF"
## [13] "2ndFlrSF" "LowQualFinSF" "GrLivArea" "BsmtFullBath"
## [17] "BsmtHalfBath" "FullBath" "HalfBath" "BedroomAbvGr"
## [21] "KitchenAbvGr" "TotRmsAbvGrd" "Fireplaces" "GarageYrBlt"
## [25] "GarageCars" "GarageArea" "WoodDeckSF" "OpenPorchSF"
## [29] "EnclosedPorch" "3SsnPorch" "ScreenPorch" "PoolArea"
## [33] "MiscVal" "MoSold" "YrSold" "SalePrice"
cor <- cor(train[, num_columns], use="complete.obs")
cor_list <- as.data.frame(cor) %>%
rownames_to_column("Feature") %>%
select(Feature, SalePrice) %>%
filter(Feature != "SalePrice") %>%
arrange(desc(abs(SalePrice)))
cor_list
## Feature SalePrice
## 1 OverallQual 0.797880680
## 2 GrLivArea 0.705153567
## 3 GarageCars 0.647033611
## 4 GarageArea 0.619329622
## 5 TotalBsmtSF 0.615612237
## 6 1stFlrSF 0.607969106
## 7 FullBath 0.566627442
## 8 TotRmsAbvGrd 0.547067360
## 9 YearBuilt 0.525393598
## 10 YearRemodAdd 0.521253270
## 11 GarageYrBlt 0.504753018
## 12 MasVnrArea 0.488658155
## 13 Fireplaces 0.461872689
## 14 BsmtFinSF1 0.390300523
## 15 LotFrontage 0.344269772
## 16 OpenPorchSF 0.343353812
## 17 WoodDeckSF 0.336855121
## 18 2ndFlrSF 0.306879002
## 19 LotArea 0.299962206
## 20 HalfBath 0.268560303
## 21 BsmtFullBath 0.236737407
## 22 BsmtUnfSF 0.213128680
## 23 BedroomAbvGr 0.166813894
## 24 EnclosedPorch -0.154843204
## 25 KitchenAbvGr -0.140497445
## 26 OverallCond -0.124391232
## 27 ScreenPorch 0.110426815
## 28 PoolArea 0.092488120
## 29 MoSold 0.051568064
## 30 BsmtHalfBath -0.036512665
## 31 MiscVal -0.036041237
## 32 3SsnPorch 0.030776594
## 33 BsmtFinSF2 -0.028021366
## 34 YrSold -0.011868823
## 35 LowQualFinSF -0.001481983
Lets plot top 10 features in terms of absolute value of correlation with SalePrice
cor_10 <- cor_list %>%
top_n(10) %>%
select(Feature) %>%
as.matrix() %>%
as.character()
cormatrix_10 <- cor(train[, c(cor_10,"SalePrice")], use="complete.obs")
corrplot::corrplot(cormatrix_10, method = "color",
type = "upper", order = "hclust", number.cex = .7,
addCoef.col = "black", # Add coefficient of correlation
tl.col = "black", tl.srt = 90, # Text label color and rotation
sig.level = 0.05, insig = "blank", # Combine with significance
diag = FALSE)# hide correlation coefficient on the principal diagonal
OverallQual and GrLivArea are strongly correlated with SalePrice
There are twin features that are correlated to each other:
GarageCars and GarageArea have an obvious realtion - if we have bigger garage, more cars fit.
TotalBsmtSF and 1stFlrSF represent the square footage of basement and first floor respectively. Values are logically very close to each other.
GrLivArea and TotRmsAbvGrd - more space generally means more rooms.
We can tackle correlation issues by excluding correlated variables in our machine learning pipeline with recipe setting correlation threshold to 0.8
Correlation matrix shows that GrLivArea is highly correlated with SalePrice as one might expect
p <- ggplot(aes(x = GrLivArea, y = SalePrice), data = train) +
geom_point(color='red') +
geom_smooth(method='lm', formula= y~x)
ggplotly(p)
There is a couple of records that seem like outliers with GrLivArea higher than 4500 SF.
There are also 2 record in top right, but altough being away from the group those seem to follow the general pattern.
train <- train %>%
filter(GrLivArea < 4500)
GarageArea & SalePrice:
p <- ggplot(aes(x = GarageArea, y = SalePrice), data = train) +
geom_point(color='orange') +
geom_smooth(method='lm', formula= y~x)
ggplotly(p)
TotalBsmtSF & SalePrice:
p <- ggplot(aes(x = TotalBsmtSF, y = SalePrice), data = train) +
geom_point(color='violet') +
geom_smooth(method='lm', formula= y~x)
ggplotly(p)
Distribution of GarageCars:
p <- ggplot(aes(x = GarageCars), data = train) +
geom_histogram(fill='red', bins = 5) +
ggtitle('Distribution of room count')
ggplotly(p)
There are only 5 properties with 4 GarageCars, lets groupd them with ones that have 3 and rename into 3+ together with changing data type to factor.
Also we will update vector of numeric and categorical columns.
train <- train %>%
mutate(GarageCars = as.character(GarageCars)) %>%
mutate(GarageCars = ifelse((GarageCars == "4" | GarageCars == "3"), "3+", GarageCars)) %>%
mutate(GarageCars = as.factor(GarageCars))
test <- test %>%
mutate(GarageCars = as.character(GarageCars)) %>%
mutate(GarageCars = ifelse((GarageCars == "4" | GarageCars == "3"), "3+", GarageCars)) %>%
mutate(GarageCars = as.factor(GarageCars))
str(train$GarageCars)
## Factor w/ 4 levels "0","1","2","3+": 3 3 3 4 4 3 3 3 3 2 ...
num_columns <- num_columns[!num_columns %in% c("GarageCars")]
cat_columns <- c(cat_columns, "GarageCars")
GarageCars & SalePrice:
p <- ggplot(train, aes(x = GarageCars, y = SalePrice, fill = GarageCars)) +
#aes(group = GarageCars) +
geom_boxplot()
ggplotly(p)
We remove GarageCars from our list of correlated numeric variables and create a scatterplot matrix:
cor_10 <- cor_10[!cor_10 %in% c("GarageCars")]
p <- ggpairs(train[,c(cor_10[1:5],"SalePrice")])#,lower = list(continuous = wrap("points", alpha = 0.3,size = 0.5)))
ggplotly(p)
Lets now check variable importance for numeric features using a random forest model:
# Set training control so that we only 1 run forest on the entire set of complete cases
data <- train[,c(num_columns)]
recipe <- recipe(data) %>%
update_role(everything(), new_role = "predictor") %>%
update_role(SalePrice, new_role = "outcome") %>%
step_log(all_outcomes()) %>%
step_knnimpute(all_predictors(),-all_nominal()) %>%
step_normalize(all_predictors())
model <- daml_train(data, recipe,
model = "rf",
tracking = "local",
run_name = "varimp",
tunelen = 1)
## determining metric
## determining tuning method
## tuning the model
## Preparing recipe
## + Fold1: mtry=5
## - Fold1: mtry=5
## + Fold2: mtry=5
## - Fold2: mtry=5
## + Fold3: mtry=5
## - Fold3: mtry=5
## + Fold4: mtry=5
## - Fold4: mtry=5
## + Fold5: mtry=5
## - Fold5: mtry=5
## Aggregating results
## Fitting final model on full training set
## track experiment locally
## generating output
varImp(model)
## rf variable importance
##
## only 20 most important variables shown (out of 34)
##
## Overall
## GrLivArea 100.00
## OverallQual 91.78
## TotalBsmtSF 87.15
## BsmtFinSF1 87.01
## YearBuilt 85.26
## LotArea 82.11
## 1stFlrSF 80.48
## OverallCond 79.18
## GarageArea 69.62
## YearRemodAdd 67.87
## Fireplaces 67.44
## 2ndFlrSF 66.91
## LotFrontage 66.50
## GarageYrBlt 64.73
## BsmtUnfSF 61.74
## TotRmsAbvGrd 59.59
## FullBath 58.29
## BedroomAbvGr 55.35
## HalfBath 48.47
## MasVnrArea 48.11
Lets take a look a categorical features and check for imbalances there:
summary(train[,cat_columns])
## Id MSSubClass MSZoning Street Alley LotShape
## 1 : 1 20 :536 C (all): 10 Grvl: 6 Grvl: 50 IR1:483
## 10 : 1 60 :297 FV : 65 Pave:1452 None:1367 IR2: 41
## 100 : 1 50 :144 RH : 16 Pave: 41 IR3: 9
## 1000 : 1 120 : 87 RL :1149 Reg:925
## 1001 : 1 30 : 69 RM : 218
## 1002 : 1 160 : 63
## (Other):1452 (Other):262
## LandContour Utilities LotConfig LandSlope Neighborhood
## Bnk: 61 AllPub:1457 Corner : 262 Gtl:1380 NAmes :225
## HLS: 50 NoSeWa: 1 CulDSac: 94 Mod: 65 CollgCr:150
## Low: 36 FR2 : 47 Sev: 13 OldTown:113
## Lvl:1311 FR3 : 4 Edwards: 98
## Inside :1051 Somerst: 86
## Gilbert: 79
## (Other):707
## Condition1 Condition2 BldgType HouseStyle RoofStyle
## Norm :1260 Norm :1444 1Fam :1218 1Story :726 Flat : 13
## Feedr : 80 Feedr : 6 2fmCon: 31 2Story :443 Gable :1141
## Artery : 48 Artery : 2 Duplex: 52 1.5Fin :154 Gambrel: 11
## RRAn : 26 RRNn : 2 Twnhs : 43 SLvl : 65 Hip : 284
## PosN : 18 PosA : 1 TwnhsE: 114 SFoyer : 37 Mansard: 7
## RRAe : 11 PosN : 1 1.5Unf : 14 Shed : 2
## (Other): 15 (Other): 2 (Other): 19
## RoofMatl Exterior1st Exterior2nd MasVnrType ExterQual ExterCond
## CompShg:1433 VinylSd:515 VinylSd:504 BrkCmn : 15 Ex: 50 Ex: 3
## Tar&Grv: 11 HdBoard:222 MetalSd:214 BrkFace:445 Fa: 14 Fa: 28
## WdShngl: 6 MetalSd:220 HdBoard:207 None :872 Gd:488 Gd: 146
## WdShake: 5 Wd Sdng:206 Wd Sdng:197 Stone :126 TA:906 Po: 1
## Membran: 1 Plywood:108 Plywood:142 TA:1280
## Metal : 1 CemntBd: 60 CmentBd: 59
## (Other): 1 (Other):127 (Other):135
## Foundation BsmtQual BsmtCond BsmtExposure BsmtFinType1 BsmtFinType2
## BrkTil:146 Ex :119 Fa : 45 Av :221 ALQ :220 ALQ : 19
## CBlock:634 Fa : 35 Gd : 65 Gd :132 BLQ :148 BLQ : 33
## PConc :645 Gd :618 None: 37 Mn :114 GLQ :416 GLQ : 14
## Slab : 24 None: 37 Po : 2 No :953 LwQ : 74 LwQ : 46
## Stone : 6 TA :649 TA :1309 None: 38 None: 37 None: 38
## Wood : 3 Rec :133 Rec : 54
## Unf :430 Unf :1254
## Heating HeatingQC CentralAir Electrical KitchenQual Functional
## Floor: 1 Ex:739 N: 95 FuseA: 94 Ex: 98 Maj1: 14
## GasA :1426 Fa: 49 Y:1363 FuseF: 27 Fa: 39 Maj2: 5
## GasW : 18 Gd:241 FuseP: 3 Gd:586 Min1: 31
## Grav : 7 Po: 1 Mix : 1 TA:735 Min2: 34
## OthW : 2 TA:428 None : 1 Mod : 15
## Wall : 4 SBrkr:1332 Sev : 1
## Typ :1358
## FireplaceQu GarageType GarageFinish GarageQual GarageCond PavedDrive
## Ex : 24 2Types : 6 Fin :350 Ex : 3 Ex : 2 N: 90
## Fa : 33 Attchd :869 None: 81 Fa : 48 Fa : 35 P: 30
## Gd :378 Basment: 19 RFn :422 Gd : 14 Gd : 9 Y:1338
## None:690 BuiltIn: 87 Unf :605 None: 81 None: 81
## Po : 20 CarPort: 9 Po : 3 Po : 7
## TA :313 Detchd :387 TA :1309 TA :1324
## None : 81
## Fence SaleType SaleCondition GarageCars
## GdPrv: 59 WD :1267 Abnorml: 101 0 : 81
## GdWo : 54 New : 120 AdjLand: 4 1 :369
## MnPrv: 157 COD : 43 Alloca : 12 2 :823
## MnWw : 11 ConLD : 9 Family : 20 3+:185
## None :1177 ConLI : 5 Normal :1198
## ConLw : 5 Partial: 123
## (Other): 9
Features like Street, Utilities, Condition2, RoofMatl, Heating are highly imbalanced.
Other features also might have low frequency levels, but we can fix it in our recipe later.
For now lets drop the above mentioned features from the dataset
train <- train %>%
select(-Street,-Utilities,-Condition2,-RoofMatl,-Heating)
test <- test %>%
select(-Street,-Utilities,-Condition2,-RoofMatl,-Heating)
cat_columns <- cat_columns[!cat_columns %in% c("Street", "Utilities", "Condition2", "RoofMatl", "Heating")]
profile_missing(train)
## feature num_missing pct_missing
## 1 LotFrontage 259 0.177640604
## 2 LotArea 0 0.000000000
## 3 OverallQual 0 0.000000000
## 4 OverallCond 0 0.000000000
## 5 YearBuilt 0 0.000000000
## 6 YearRemodAdd 0 0.000000000
## 7 MasVnrArea 8 0.005486968
## 8 BsmtFinSF1 0 0.000000000
## 9 BsmtFinSF2 0 0.000000000
## 10 BsmtUnfSF 0 0.000000000
## 11 TotalBsmtSF 0 0.000000000
## 12 1stFlrSF 0 0.000000000
## 13 2ndFlrSF 0 0.000000000
## 14 LowQualFinSF 0 0.000000000
## 15 GrLivArea 0 0.000000000
## 16 BsmtFullBath 0 0.000000000
## 17 BsmtHalfBath 0 0.000000000
## 18 FullBath 0 0.000000000
## 19 HalfBath 0 0.000000000
## 20 BedroomAbvGr 0 0.000000000
## 21 KitchenAbvGr 0 0.000000000
## 22 TotRmsAbvGrd 0 0.000000000
## 23 Fireplaces 0 0.000000000
## 24 GarageYrBlt 81 0.055555556
## 25 GarageCars 0 0.000000000
## 26 GarageArea 0 0.000000000
## 27 WoodDeckSF 0 0.000000000
## 28 OpenPorchSF 0 0.000000000
## 29 EnclosedPorch 0 0.000000000
## 30 3SsnPorch 0 0.000000000
## 31 ScreenPorch 0 0.000000000
## 32 PoolArea 0 0.000000000
## 33 MiscVal 0 0.000000000
## 34 MoSold 0 0.000000000
## 35 YrSold 0 0.000000000
## 36 SalePrice 0 0.000000000
## 37 Id 0 0.000000000
## 38 MSSubClass 0 0.000000000
## 39 MSZoning 0 0.000000000
## 40 Alley 0 0.000000000
## 41 LotShape 0 0.000000000
## 42 LandContour 0 0.000000000
## 43 LotConfig 0 0.000000000
## 44 LandSlope 0 0.000000000
## 45 Neighborhood 0 0.000000000
## 46 Condition1 0 0.000000000
## 47 BldgType 0 0.000000000
## 48 HouseStyle 0 0.000000000
## 49 RoofStyle 0 0.000000000
## 50 Exterior1st 0 0.000000000
## 51 Exterior2nd 0 0.000000000
## 52 MasVnrType 0 0.000000000
## 53 ExterQual 0 0.000000000
## 54 ExterCond 0 0.000000000
## 55 Foundation 0 0.000000000
## 56 BsmtQual 0 0.000000000
## 57 BsmtCond 0 0.000000000
## 58 BsmtExposure 0 0.000000000
## 59 BsmtFinType1 0 0.000000000
## 60 BsmtFinType2 0 0.000000000
## 61 HeatingQC 0 0.000000000
## 62 CentralAir 0 0.000000000
## 63 Electrical 0 0.000000000
## 64 KitchenQual 0 0.000000000
## 65 Functional 0 0.000000000
## 66 FireplaceQu 0 0.000000000
## 67 GarageType 0 0.000000000
## 68 GarageFinish 0 0.000000000
## 69 GarageQual 0 0.000000000
## 70 GarageCond 0 0.000000000
## 71 PavedDrive 0 0.000000000
## 72 Fence 0 0.000000000
## 73 SaleType 0 0.000000000
## 74 SaleCondition 0 0.000000000
Lets take a look at all features and their importance.
First we will do target encoding for categorical features and see if they will good for modelling.
recipe <- recipe(train) %>%
update_role(everything(), new_role = "predictor") %>%
update_role(SalePrice, new_role = "outcome") %>%
update_role(Id, new_role = "ID variable") %>%
step_log(all_outcomes()) %>%
step_other(all_predictors(), -all_numeric(), threshold = 0.01) %>%
step_novel(all_predictors(),-all_numeric(), new_level = "new") %>%
step_lencode_mixed(all_predictors(), -all_numeric(), outcome = vars(SalePrice)) %>%
step_knnimpute(all_predictors()) %>%
step_corr(all_predictors()) %>%
step_normalize(all_predictors())
model <- daml_train(train, recipe,
model = "rf",
tracking = "local",
run_name = "varimp",
tunelen = 1)
## determining metric
## determining tuning method
## tuning the model
## Preparing recipe
## + Fold1: mtry=8
## - Fold1: mtry=8
## + Fold2: mtry=8
## - Fold2: mtry=8
## + Fold3: mtry=8
## - Fold3: mtry=8
## + Fold4: mtry=8
## - Fold4: mtry=8
## + Fold5: mtry=8
## model fit failed for Fold5: mtry=8 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold5: mtry=8
## Aggregating results
## Fitting final model on full training set
## track experiment locally
## generating output
varImp(model)
## rf variable importance
##
## only 20 most important variables shown (out of 70)
##
## Overall
## TotalBsmtSF 100.00
## GrLivArea 99.19
## BsmtFinSF1 89.65
## Neighborhood 89.63
## LotArea 89.49
## 1stFlrSF 88.77
## OverallQual 82.15
## 2ndFlrSF 81.89
## GarageArea 71.88
## FireplaceQu 69.17
## MSSubClass 66.06
## TotRmsAbvGrd 65.39
## YearRemodAdd 61.41
## YearBuilt 58.09
## Fireplaces 57.81
## GarageCars 57.56
## OverallCond 56.65
## LotFrontage 56.48
## KitchenQual 53.13
## GarageType 52.81
Lets take a look at the Neighborhood and average prices of property per area.
Location is one of the most importance price determinant together with the house size and quality of materials.
summary(train$Neighborhood)
## Blmngtn Blueste BrDale BrkSide ClearCr CollgCr Crawfor Edwards Gilbert IDOTRR
## 17 2 16 58 28 150 51 98 79 37
## MeadowV Mitchel NAmes NoRidge NPkVill NridgHt NWAmes OldTown Sawyer SawyerW
## 17 49 225 41 9 77 73 113 74 59
## Somerst StoneBr SWISU Timber Veenker
## 86 25 25 38 11
ggplot(train, aes(x = Neighborhood, y = SalePrice, fill = Neighborhood)) +
geom_boxplot()
OverallQual is one of the most important features for this model
p <- ggplot(train, aes(x = OverallQual, y = SalePrice, fill = OverallQual)) +
aes(group = OverallQual) +
geom_boxplot()
ggplotly(p)
Another interesting feature that detemines the location is MSZoning.
p <- ggplot(train, aes(x = MSZoning, y = SalePrice, fill = MSZoning)) +
aes(group = MSZoning) +
geom_boxplot()
ggplotly(p)
Another interesting feature that detemines the location is MSSubClass.
p <- ggplot(train, aes(x = MSSubClass, y = SalePrice, fill = MSSubClass)) +
aes(group = MSSubClass) +
geom_boxplot()
ggplotly(p)
set.seed(123)
recipe <- recipe(train) %>%
update_role(everything(), new_role = "predictor") %>%
update_role(SalePrice, new_role = "outcome") %>%
update_role(Id, new_role = "ID variable") %>%
step_log(all_outcomes()) %>%
step_other(all_predictors(), -all_numeric(), threshold = 0.01) %>%
step_novel(all_predictors(),-all_numeric(), new_level = "new") %>%
step_lencode_mixed(all_predictors(), -all_numeric(), outcome = vars(SalePrice)) %>%
step_knnimpute(all_predictors()) %>%
step_corr(all_predictors(), threshold = 0.8) %>%
step_normalize(all_predictors())
rf <- daml_train(train, recipe,
model = "rf",
feature_selection = "varimp",
max_features = 24,
p_subset = 0.4,
tracking = "mlflow",
tracking_uri = "http://localhost:5000",
mlflow_experiment = "house pricing",
tunelen = 5)
## determining metric
## determining tuning method
## varimp feature selection
## Preparing recipe
## + Fold1: mtry= 2
## - Fold1: mtry= 2
## + Fold1: mtry=17
## - Fold1: mtry=17
## + Fold1: mtry=33
## - Fold1: mtry=33
## + Fold1: mtry=48
## - Fold1: mtry=48
## + Fold1: mtry=64
## - Fold1: mtry=64
## + Fold2: mtry= 2
## - Fold2: mtry= 2
## + Fold2: mtry=17
## - Fold2: mtry=17
## + Fold2: mtry=33
## - Fold2: mtry=33
## + Fold2: mtry=48
## - Fold2: mtry=48
## + Fold2: mtry=64
## - Fold2: mtry=64
## + Fold3: mtry= 2
## model fit failed for Fold3: mtry= 2 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold3: mtry= 2
## + Fold3: mtry=17
## model fit failed for Fold3: mtry=17 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold3: mtry=17
## + Fold3: mtry=33
## model fit failed for Fold3: mtry=33 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold3: mtry=33
## + Fold3: mtry=48
## model fit failed for Fold3: mtry=48 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold3: mtry=48
## + Fold3: mtry=64
## model fit failed for Fold3: mtry=64 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold3: mtry=64
## + Fold4: mtry= 2
## model fit failed for Fold4: mtry= 2 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold4: mtry= 2
## + Fold4: mtry=17
## model fit failed for Fold4: mtry=17 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold4: mtry=17
## + Fold4: mtry=33
## model fit failed for Fold4: mtry=33 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold4: mtry=33
## + Fold4: mtry=48
## model fit failed for Fold4: mtry=48 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold4: mtry=48
## + Fold4: mtry=64
## model fit failed for Fold4: mtry=64 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold4: mtry=64
## + Fold5: mtry= 2
## model fit failed for Fold5: mtry= 2 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold5: mtry= 2
## + Fold5: mtry=17
## model fit failed for Fold5: mtry=17 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold5: mtry=17
## + Fold5: mtry=33
## model fit failed for Fold5: mtry=33 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold5: mtry=33
## + Fold5: mtry=48
## model fit failed for Fold5: mtry=48 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold5: mtry=48
## + Fold5: mtry=64
## model fit failed for Fold5: mtry=64 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold5: mtry=64
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 33 on full training set
## selected features [varimp]:
## [1] "OverallQual" "1stFlrSF" "Neighborhood" "MSSubClass" "2ndFlrSF"
## [6] "GarageArea" "BsmtFinSF1" "TotRmsAbvGrd" "LotArea" "FullBath"
## [11] "Fireplaces" "GarageType" "YearRemodAdd" "OpenPorchSF" "KitchenQual"
## [16] "GarageYrBlt" "ExterQual" "LotFrontage" "BedroomAbvGr" "BsmtFinType1"
## [21] "BsmtQual" "MSZoning" "HalfBath" "Exterior1st"
## tuning the model
## Preparing recipe
## + Fold1: mtry= 2
## model fit failed for Fold1: mtry= 2 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold1: mtry= 2
## + Fold1: mtry=17
## model fit failed for Fold1: mtry=17 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold1: mtry=17
## + Fold1: mtry=33
## model fit failed for Fold1: mtry=33 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold1: mtry=33
## + Fold1: mtry=48
## model fit failed for Fold1: mtry=48 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold1: mtry=48
## + Fold1: mtry=64
## model fit failed for Fold1: mtry=64 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold1: mtry=64
## + Fold2: mtry= 2
## - Fold2: mtry= 2
## + Fold2: mtry=17
## - Fold2: mtry=17
## + Fold2: mtry=33
## - Fold2: mtry=33
## + Fold2: mtry=48
## - Fold2: mtry=48
## + Fold2: mtry=64
## - Fold2: mtry=64
## + Fold3: mtry= 2
## - Fold3: mtry= 2
## + Fold3: mtry=17
## - Fold3: mtry=17
## + Fold3: mtry=33
## - Fold3: mtry=33
## + Fold3: mtry=48
## - Fold3: mtry=48
## + Fold3: mtry=64
## - Fold3: mtry=64
## + Fold4: mtry= 2
## - Fold4: mtry= 2
## + Fold4: mtry=17
## - Fold4: mtry=17
## + Fold4: mtry=33
## - Fold4: mtry=33
## + Fold4: mtry=48
## - Fold4: mtry=48
## + Fold4: mtry=64
## - Fold4: mtry=64
## + Fold5: mtry= 2
## model fit failed for Fold5: mtry= 2 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold5: mtry= 2
## + Fold5: mtry=17
## model fit failed for Fold5: mtry=17 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold5: mtry=17
## + Fold5: mtry=33
## model fit failed for Fold5: mtry=33 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold5: mtry=33
## + Fold5: mtry=48
## model fit failed for Fold5: mtry=48 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold5: mtry=48
## + Fold5: mtry=64
## model fit failed for Fold5: mtry=64 Error in randomForest.default(x, y, mtry = param$mtry, ...) :
## NA not permitted in predictors
##
## - Fold5: mtry=64
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 17 on full training set
## track experiment with mlflow
## generating output
varImp(rf)
## rf variable importance
##
## only 20 most important variables shown (out of 64)
##
## Overall
## 1stFlrSF 100.00
## 2ndFlrSF 91.77
## OverallQual 87.96
## Neighborhood 85.38
## LotArea 83.15
## BsmtFinSF1 82.03
## TotRmsAbvGrd 74.21
## GarageArea 67.28
## MSSubClass 62.03
## Fireplaces 60.99
## OverallCond 53.69
## FullBath 53.40
## ExterQual 46.36
## YearRemodAdd 45.67
## MasVnrArea 44.76
## HalfBath 44.32
## KitchenQual 44.07
## OpenPorchSF 43.34
## GarageType 42.61
## BsmtUnfSF 42.53
Now lets try our first submission!
submission <- test %>%
daml_predict(model = rf, pred_field = "SalePrice") %>%
select(Id, SalePrice) %>%
mutate(SalePrice = exp(SalePrice))
write.csv(submission, "rf_baseline.csv", row.names = F)
Our submission scored 0.14183 on a leaderboard!
Now lets try gbm instad of random forest
gbm <- daml_train(train, recipe,
model = "gbm",
feature_selection = "varimp",
max_features = 24,
p_subset = 0.4,
tracking = "mlflow",
tracking_uri = "http://localhost:5000",
mlflow_experiment = "house pricing",
tunelen = 10)
## determining metric
## determining tuning method
## varimp feature selection
## Preparing recipe
## + Fold1: shrinkage=0.1, interaction.depth=1, n.minobsinnode=10, n.trees=250
## model fit failed for Fold1: shrinkage=0.1, interaction.depth=1, n.minobsinnode=10, n.trees=250 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:PoolArea
##
## - Fold1: shrinkage=0.1, interaction.depth=1, n.minobsinnode=10, n.trees=250
## + Fold1: shrinkage=0.1, interaction.depth=2, n.minobsinnode=10, n.trees=250
## model fit failed for Fold1: shrinkage=0.1, interaction.depth=2, n.minobsinnode=10, n.trees=250 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:PoolArea
##
## - Fold1: shrinkage=0.1, interaction.depth=2, n.minobsinnode=10, n.trees=250
## + Fold1: shrinkage=0.1, interaction.depth=3, n.minobsinnode=10, n.trees=250
## model fit failed for Fold1: shrinkage=0.1, interaction.depth=3, n.minobsinnode=10, n.trees=250 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:PoolArea
##
## - Fold1: shrinkage=0.1, interaction.depth=3, n.minobsinnode=10, n.trees=250
## + Fold1: shrinkage=0.1, interaction.depth=4, n.minobsinnode=10, n.trees=250
## model fit failed for Fold1: shrinkage=0.1, interaction.depth=4, n.minobsinnode=10, n.trees=250 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:PoolArea
##
## - Fold1: shrinkage=0.1, interaction.depth=4, n.minobsinnode=10, n.trees=250
## + Fold1: shrinkage=0.1, interaction.depth=5, n.minobsinnode=10, n.trees=250
## model fit failed for Fold1: shrinkage=0.1, interaction.depth=5, n.minobsinnode=10, n.trees=250 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:PoolArea
##
## - Fold1: shrinkage=0.1, interaction.depth=5, n.minobsinnode=10, n.trees=250
## + Fold2: shrinkage=0.1, interaction.depth=1, n.minobsinnode=10, n.trees=250
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1342 nan 0.1000 0.0121
## 2 0.1224 nan 0.1000 0.0118
## 3 0.1132 nan 0.1000 0.0085
## 4 0.1051 nan 0.1000 0.0083
## 5 0.0980 nan 0.1000 0.0080
## 6 0.0909 nan 0.1000 0.0055
## 7 0.0851 nan 0.1000 0.0055
## 8 0.0799 nan 0.1000 0.0054
## 9 0.0753 nan 0.1000 0.0041
## 10 0.0713 nan 0.1000 0.0035
## 20 0.0449 nan 0.1000 0.0016
## 40 0.0239 nan 0.1000 0.0006
## 60 0.0164 nan 0.1000 0.0002
## 80 0.0131 nan 0.1000 0.0000
## 100 0.0115 nan 0.1000 -0.0000
## 120 0.0106 nan 0.1000 -0.0000
## 140 0.0099 nan 0.1000 -0.0000
## 160 0.0094 nan 0.1000 -0.0000
## 180 0.0090 nan 0.1000 -0.0000
## 200 0.0086 nan 0.1000 -0.0000
## 220 0.0084 nan 0.1000 0.0000
## 240 0.0081 nan 0.1000 -0.0000
## 250 0.0079 nan 0.1000 -0.0000
##
## - Fold2: shrinkage=0.1, interaction.depth=1, n.minobsinnode=10, n.trees=250
## + Fold2: shrinkage=0.1, interaction.depth=2, n.minobsinnode=10, n.trees=250
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1310 nan 0.1000 0.0154
## 2 0.1167 nan 0.1000 0.0136
## 3 0.1055 nan 0.1000 0.0102
## 4 0.0956 nan 0.1000 0.0103
## 5 0.0871 nan 0.1000 0.0080
## 6 0.0802 nan 0.1000 0.0068
## 7 0.0735 nan 0.1000 0.0063
## 8 0.0670 nan 0.1000 0.0066
## 9 0.0620 nan 0.1000 0.0037
## 10 0.0577 nan 0.1000 0.0037
## 20 0.0304 nan 0.1000 0.0016
## 40 0.0148 nan 0.1000 0.0001
## 60 0.0106 nan 0.1000 -0.0000
## 80 0.0090 nan 0.1000 -0.0000
## 100 0.0081 nan 0.1000 -0.0001
## 120 0.0074 nan 0.1000 0.0000
## 140 0.0068 nan 0.1000 -0.0000
## 160 0.0063 nan 0.1000 -0.0000
## 180 0.0058 nan 0.1000 -0.0000
## 200 0.0055 nan 0.1000 -0.0000
## 220 0.0051 nan 0.1000 -0.0000
## 240 0.0048 nan 0.1000 -0.0000
## 250 0.0046 nan 0.1000 -0.0000
##
## - Fold2: shrinkage=0.1, interaction.depth=2, n.minobsinnode=10, n.trees=250
## + Fold2: shrinkage=0.1, interaction.depth=3, n.minobsinnode=10, n.trees=250
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1291 nan 0.1000 0.0190
## 2 0.1141 nan 0.1000 0.0135
## 3 0.1008 nan 0.1000 0.0129
## 4 0.0899 nan 0.1000 0.0100
## 5 0.0797 nan 0.1000 0.0087
## 6 0.0717 nan 0.1000 0.0074
## 7 0.0643 nan 0.1000 0.0071
## 8 0.0583 nan 0.1000 0.0048
## 9 0.0532 nan 0.1000 0.0046
## 10 0.0486 nan 0.1000 0.0045
## 20 0.0237 nan 0.1000 0.0013
## 40 0.0116 nan 0.1000 0.0001
## 60 0.0087 nan 0.1000 -0.0000
## 80 0.0073 nan 0.1000 -0.0001
## 100 0.0063 nan 0.1000 -0.0000
## 120 0.0056 nan 0.1000 -0.0001
## 140 0.0051 nan 0.1000 -0.0000
## 160 0.0046 nan 0.1000 -0.0000
## 180 0.0042 nan 0.1000 -0.0000
## 200 0.0039 nan 0.1000 -0.0000
## 220 0.0036 nan 0.1000 -0.0000
## 240 0.0033 nan 0.1000 -0.0000
## 250 0.0032 nan 0.1000 -0.0000
##
## - Fold2: shrinkage=0.1, interaction.depth=3, n.minobsinnode=10, n.trees=250
## + Fold2: shrinkage=0.1, interaction.depth=4, n.minobsinnode=10, n.trees=250
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1290 nan 0.1000 0.0200
## 2 0.1110 nan 0.1000 0.0151
## 3 0.0972 nan 0.1000 0.0136
## 4 0.0856 nan 0.1000 0.0101
## 5 0.0754 nan 0.1000 0.0082
## 6 0.0667 nan 0.1000 0.0079
## 7 0.0593 nan 0.1000 0.0066
## 8 0.0532 nan 0.1000 0.0046
## 9 0.0483 nan 0.1000 0.0048
## 10 0.0432 nan 0.1000 0.0043
## 20 0.0194 nan 0.1000 0.0008
## 40 0.0096 nan 0.1000 0.0000
## 60 0.0073 nan 0.1000 -0.0000
## 80 0.0059 nan 0.1000 0.0000
## 100 0.0050 nan 0.1000 -0.0000
## 120 0.0042 nan 0.1000 -0.0000
## 140 0.0037 nan 0.1000 -0.0000
## 160 0.0032 nan 0.1000 -0.0000
## 180 0.0028 nan 0.1000 -0.0000
## 200 0.0025 nan 0.1000 -0.0000
## 220 0.0022 nan 0.1000 -0.0000
## 240 0.0020 nan 0.1000 -0.0000
## 250 0.0019 nan 0.1000 -0.0000
##
## - Fold2: shrinkage=0.1, interaction.depth=4, n.minobsinnode=10, n.trees=250
## + Fold2: shrinkage=0.1, interaction.depth=5, n.minobsinnode=10, n.trees=250
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1269 nan 0.1000 0.0218
## 2 0.1103 nan 0.1000 0.0134
## 3 0.0968 nan 0.1000 0.0127
## 4 0.0844 nan 0.1000 0.0104
## 5 0.0747 nan 0.1000 0.0096
## 6 0.0658 nan 0.1000 0.0072
## 7 0.0587 nan 0.1000 0.0065
## 8 0.0529 nan 0.1000 0.0052
## 9 0.0478 nan 0.1000 0.0041
## 10 0.0427 nan 0.1000 0.0037
## 20 0.0189 nan 0.1000 0.0009
## 40 0.0093 nan 0.1000 0.0001
## 60 0.0071 nan 0.1000 -0.0001
## 80 0.0056 nan 0.1000 -0.0001
## 100 0.0045 nan 0.1000 -0.0001
## 120 0.0038 nan 0.1000 -0.0000
## 140 0.0032 nan 0.1000 -0.0000
## 160 0.0027 nan 0.1000 -0.0000
## 180 0.0023 nan 0.1000 -0.0000
## 200 0.0020 nan 0.1000 -0.0000
## 220 0.0017 nan 0.1000 -0.0000
## 240 0.0015 nan 0.1000 -0.0000
## 250 0.0014 nan 0.1000 -0.0000
##
## - Fold2: shrinkage=0.1, interaction.depth=5, n.minobsinnode=10, n.trees=250
## + Fold3: shrinkage=0.1, interaction.depth=1, n.minobsinnode=10, n.trees=250
## model fit failed for Fold3: shrinkage=0.1, interaction.depth=1, n.minobsinnode=10, n.trees=250 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold3: shrinkage=0.1, interaction.depth=1, n.minobsinnode=10, n.trees=250
## + Fold3: shrinkage=0.1, interaction.depth=2, n.minobsinnode=10, n.trees=250
## model fit failed for Fold3: shrinkage=0.1, interaction.depth=2, n.minobsinnode=10, n.trees=250 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold3: shrinkage=0.1, interaction.depth=2, n.minobsinnode=10, n.trees=250
## + Fold3: shrinkage=0.1, interaction.depth=3, n.minobsinnode=10, n.trees=250
## model fit failed for Fold3: shrinkage=0.1, interaction.depth=3, n.minobsinnode=10, n.trees=250 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold3: shrinkage=0.1, interaction.depth=3, n.minobsinnode=10, n.trees=250
## + Fold3: shrinkage=0.1, interaction.depth=4, n.minobsinnode=10, n.trees=250
## model fit failed for Fold3: shrinkage=0.1, interaction.depth=4, n.minobsinnode=10, n.trees=250 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold3: shrinkage=0.1, interaction.depth=4, n.minobsinnode=10, n.trees=250
## + Fold3: shrinkage=0.1, interaction.depth=5, n.minobsinnode=10, n.trees=250
## model fit failed for Fold3: shrinkage=0.1, interaction.depth=5, n.minobsinnode=10, n.trees=250 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold3: shrinkage=0.1, interaction.depth=5, n.minobsinnode=10, n.trees=250
## + Fold4: shrinkage=0.1, interaction.depth=1, n.minobsinnode=10, n.trees=250
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1325 nan 0.1000 0.0116
## 2 0.1210 nan 0.1000 0.0111
## 3 0.1117 nan 0.1000 0.0088
## 4 0.1034 nan 0.1000 0.0083
## 5 0.0959 nan 0.1000 0.0073
## 6 0.0898 nan 0.1000 0.0062
## 7 0.0845 nan 0.1000 0.0044
## 8 0.0795 nan 0.1000 0.0043
## 9 0.0751 nan 0.1000 0.0039
## 10 0.0713 nan 0.1000 0.0041
## 20 0.0465 nan 0.1000 0.0018
## 40 0.0254 nan 0.1000 0.0004
## 60 0.0180 nan 0.1000 0.0002
## 80 0.0147 nan 0.1000 0.0000
## 100 0.0128 nan 0.1000 -0.0000
## 120 0.0115 nan 0.1000 0.0000
## 140 0.0107 nan 0.1000 -0.0000
## 160 0.0100 nan 0.1000 -0.0000
## 180 0.0095 nan 0.1000 -0.0000
## 200 0.0091 nan 0.1000 -0.0000
## 220 0.0087 nan 0.1000 -0.0000
## 240 0.0084 nan 0.1000 -0.0000
## 250 0.0083 nan 0.1000 -0.0000
##
## - Fold4: shrinkage=0.1, interaction.depth=1, n.minobsinnode=10, n.trees=250
## + Fold4: shrinkage=0.1, interaction.depth=2, n.minobsinnode=10, n.trees=250
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1278 nan 0.1000 0.0160
## 2 0.1134 nan 0.1000 0.0133
## 3 0.1013 nan 0.1000 0.0108
## 4 0.0931 nan 0.1000 0.0060
## 5 0.0854 nan 0.1000 0.0067
## 6 0.0786 nan 0.1000 0.0065
## 7 0.0714 nan 0.1000 0.0070
## 8 0.0661 nan 0.1000 0.0049
## 9 0.0615 nan 0.1000 0.0040
## 10 0.0572 nan 0.1000 0.0034
## 20 0.0303 nan 0.1000 0.0012
## 40 0.0147 nan 0.1000 0.0002
## 60 0.0109 nan 0.1000 -0.0000
## 80 0.0089 nan 0.1000 -0.0000
## 100 0.0079 nan 0.1000 -0.0001
## 120 0.0072 nan 0.1000 -0.0001
## 140 0.0066 nan 0.1000 -0.0000
## 160 0.0061 nan 0.1000 -0.0000
## 180 0.0057 nan 0.1000 -0.0000
## 200 0.0053 nan 0.1000 -0.0000
## 220 0.0050 nan 0.1000 -0.0001
## 240 0.0047 nan 0.1000 -0.0000
## 250 0.0046 nan 0.1000 -0.0000
##
## - Fold4: shrinkage=0.1, interaction.depth=2, n.minobsinnode=10, n.trees=250
## + Fold4: shrinkage=0.1, interaction.depth=3, n.minobsinnode=10, n.trees=250
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1280 nan 0.1000 0.0155
## 2 0.1141 nan 0.1000 0.0133
## 3 0.1005 nan 0.1000 0.0127
## 4 0.0895 nan 0.1000 0.0111
## 5 0.0805 nan 0.1000 0.0066
## 6 0.0720 nan 0.1000 0.0076
## 7 0.0651 nan 0.1000 0.0064
## 8 0.0587 nan 0.1000 0.0051
## 9 0.0532 nan 0.1000 0.0050
## 10 0.0493 nan 0.1000 0.0039
## 20 0.0239 nan 0.1000 0.0011
## 40 0.0118 nan 0.1000 0.0000
## 60 0.0086 nan 0.1000 0.0000
## 80 0.0071 nan 0.1000 -0.0000
## 100 0.0061 nan 0.1000 0.0000
## 120 0.0054 nan 0.1000 -0.0000
## 140 0.0048 nan 0.1000 -0.0000
## 160 0.0043 nan 0.1000 -0.0000
## 180 0.0040 nan 0.1000 -0.0000
## 200 0.0036 nan 0.1000 -0.0000
## 220 0.0033 nan 0.1000 -0.0001
## 240 0.0030 nan 0.1000 -0.0000
## 250 0.0029 nan 0.1000 -0.0000
##
## - Fold4: shrinkage=0.1, interaction.depth=3, n.minobsinnode=10, n.trees=250
## + Fold4: shrinkage=0.1, interaction.depth=4, n.minobsinnode=10, n.trees=250
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1261 nan 0.1000 0.0171
## 2 0.1093 nan 0.1000 0.0172
## 3 0.0955 nan 0.1000 0.0142
## 4 0.0844 nan 0.1000 0.0110
## 5 0.0756 nan 0.1000 0.0087
## 6 0.0677 nan 0.1000 0.0070
## 7 0.0603 nan 0.1000 0.0066
## 8 0.0545 nan 0.1000 0.0051
## 9 0.0492 nan 0.1000 0.0053
## 10 0.0448 nan 0.1000 0.0042
## 20 0.0206 nan 0.1000 0.0011
## 40 0.0096 nan 0.1000 0.0001
## 60 0.0070 nan 0.1000 -0.0001
## 80 0.0058 nan 0.1000 -0.0000
## 100 0.0049 nan 0.1000 -0.0000
## 120 0.0042 nan 0.1000 -0.0001
## 140 0.0036 nan 0.1000 -0.0001
## 160 0.0031 nan 0.1000 -0.0000
## 180 0.0028 nan 0.1000 -0.0000
## 200 0.0024 nan 0.1000 -0.0000
## 220 0.0022 nan 0.1000 -0.0000
## 240 0.0020 nan 0.1000 -0.0000
## 250 0.0019 nan 0.1000 -0.0000
##
## - Fold4: shrinkage=0.1, interaction.depth=4, n.minobsinnode=10, n.trees=250
## + Fold4: shrinkage=0.1, interaction.depth=5, n.minobsinnode=10, n.trees=250
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1250 nan 0.1000 0.0167
## 2 0.1068 nan 0.1000 0.0190
## 3 0.0926 nan 0.1000 0.0136
## 4 0.0814 nan 0.1000 0.0108
## 5 0.0716 nan 0.1000 0.0096
## 6 0.0638 nan 0.1000 0.0086
## 7 0.0564 nan 0.1000 0.0054
## 8 0.0501 nan 0.1000 0.0056
## 9 0.0449 nan 0.1000 0.0043
## 10 0.0408 nan 0.1000 0.0038
## 20 0.0180 nan 0.1000 0.0009
## 40 0.0085 nan 0.1000 0.0001
## 60 0.0062 nan 0.1000 -0.0001
## 80 0.0050 nan 0.1000 -0.0001
## 100 0.0041 nan 0.1000 -0.0001
## 120 0.0035 nan 0.1000 -0.0000
## 140 0.0029 nan 0.1000 -0.0000
## 160 0.0025 nan 0.1000 -0.0000
## 180 0.0022 nan 0.1000 -0.0000
## 200 0.0019 nan 0.1000 -0.0000
## 220 0.0017 nan 0.1000 -0.0000
## 240 0.0015 nan 0.1000 -0.0000
## 250 0.0014 nan 0.1000 -0.0000
##
## - Fold4: shrinkage=0.1, interaction.depth=5, n.minobsinnode=10, n.trees=250
## + Fold5: shrinkage=0.1, interaction.depth=1, n.minobsinnode=10, n.trees=250
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1325 nan 0.1000 0.0101
## 2 0.1216 nan 0.1000 0.0108
## 3 0.1115 nan 0.1000 0.0092
## 4 0.1030 nan 0.1000 0.0084
## 5 0.0960 nan 0.1000 0.0062
## 6 0.0896 nan 0.1000 0.0067
## 7 0.0848 nan 0.1000 0.0054
## 8 0.0807 nan 0.1000 0.0045
## 9 0.0763 nan 0.1000 0.0044
## 10 0.0714 nan 0.1000 0.0037
## 20 0.0449 nan 0.1000 0.0015
## 40 0.0246 nan 0.1000 0.0006
## 60 0.0165 nan 0.1000 0.0001
## 80 0.0131 nan 0.1000 -0.0000
## 100 0.0112 nan 0.1000 0.0000
## 120 0.0099 nan 0.1000 -0.0000
## 140 0.0091 nan 0.1000 -0.0000
## 160 0.0086 nan 0.1000 0.0000
## 180 0.0081 nan 0.1000 -0.0000
## 200 0.0077 nan 0.1000 0.0000
## 220 0.0074 nan 0.1000 -0.0000
## 240 0.0070 nan 0.1000 -0.0000
## 250 0.0069 nan 0.1000 -0.0000
##
## - Fold5: shrinkage=0.1, interaction.depth=1, n.minobsinnode=10, n.trees=250
## + Fold5: shrinkage=0.1, interaction.depth=2, n.minobsinnode=10, n.trees=250
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1285 nan 0.1000 0.0147
## 2 0.1145 nan 0.1000 0.0127
## 3 0.1046 nan 0.1000 0.0093
## 4 0.0942 nan 0.1000 0.0091
## 5 0.0862 nan 0.1000 0.0071
## 6 0.0794 nan 0.1000 0.0063
## 7 0.0720 nan 0.1000 0.0070
## 8 0.0659 nan 0.1000 0.0058
## 9 0.0608 nan 0.1000 0.0046
## 10 0.0569 nan 0.1000 0.0032
## 20 0.0299 nan 0.1000 0.0016
## 40 0.0143 nan 0.1000 0.0001
## 60 0.0103 nan 0.1000 -0.0000
## 80 0.0084 nan 0.1000 -0.0000
## 100 0.0074 nan 0.1000 0.0000
## 120 0.0067 nan 0.1000 -0.0000
## 140 0.0061 nan 0.1000 -0.0000
## 160 0.0056 nan 0.1000 -0.0000
## 180 0.0053 nan 0.1000 -0.0000
## 200 0.0049 nan 0.1000 -0.0000
## 220 0.0046 nan 0.1000 -0.0000
## 240 0.0044 nan 0.1000 -0.0000
## 250 0.0042 nan 0.1000 -0.0000
##
## - Fold5: shrinkage=0.1, interaction.depth=2, n.minobsinnode=10, n.trees=250
## + Fold5: shrinkage=0.1, interaction.depth=3, n.minobsinnode=10, n.trees=250
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1248 nan 0.1000 0.0178
## 2 0.1117 nan 0.1000 0.0127
## 3 0.0977 nan 0.1000 0.0122
## 4 0.0872 nan 0.1000 0.0082
## 5 0.0793 nan 0.1000 0.0076
## 6 0.0718 nan 0.1000 0.0049
## 7 0.0654 nan 0.1000 0.0068
## 8 0.0598 nan 0.1000 0.0053
## 9 0.0543 nan 0.1000 0.0052
## 10 0.0495 nan 0.1000 0.0040
## 20 0.0240 nan 0.1000 0.0011
## 40 0.0111 nan 0.1000 0.0001
## 60 0.0081 nan 0.1000 0.0000
## 80 0.0067 nan 0.1000 -0.0000
## 100 0.0058 nan 0.1000 0.0000
## 120 0.0052 nan 0.1000 -0.0000
## 140 0.0046 nan 0.1000 -0.0000
## 160 0.0042 nan 0.1000 -0.0000
## 180 0.0038 nan 0.1000 -0.0000
## 200 0.0035 nan 0.1000 -0.0000
## 220 0.0032 nan 0.1000 -0.0000
## 240 0.0030 nan 0.1000 -0.0000
## 250 0.0029 nan 0.1000 -0.0000
##
## - Fold5: shrinkage=0.1, interaction.depth=3, n.minobsinnode=10, n.trees=250
## + Fold5: shrinkage=0.1, interaction.depth=4, n.minobsinnode=10, n.trees=250
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1241 nan 0.1000 0.0167
## 2 0.1085 nan 0.1000 0.0144
## 3 0.0950 nan 0.1000 0.0119
## 4 0.0836 nan 0.1000 0.0098
## 5 0.0745 nan 0.1000 0.0082
## 6 0.0667 nan 0.1000 0.0074
## 7 0.0596 nan 0.1000 0.0075
## 8 0.0543 nan 0.1000 0.0048
## 9 0.0485 nan 0.1000 0.0050
## 10 0.0433 nan 0.1000 0.0052
## 20 0.0199 nan 0.1000 0.0008
## 40 0.0092 nan 0.1000 -0.0000
## 60 0.0069 nan 0.1000 -0.0000
## 80 0.0055 nan 0.1000 -0.0000
## 100 0.0048 nan 0.1000 -0.0001
## 120 0.0042 nan 0.1000 -0.0000
## 140 0.0037 nan 0.1000 -0.0000
## 160 0.0033 nan 0.1000 -0.0000
## 180 0.0029 nan 0.1000 -0.0000
## 200 0.0026 nan 0.1000 -0.0000
## 220 0.0024 nan 0.1000 -0.0000
## 240 0.0021 nan 0.1000 -0.0000
## 250 0.0020 nan 0.1000 -0.0000
##
## - Fold5: shrinkage=0.1, interaction.depth=4, n.minobsinnode=10, n.trees=250
## + Fold5: shrinkage=0.1, interaction.depth=5, n.minobsinnode=10, n.trees=250
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1232 nan 0.1000 0.0208
## 2 0.1073 nan 0.1000 0.0134
## 3 0.0937 nan 0.1000 0.0116
## 4 0.0825 nan 0.1000 0.0101
## 5 0.0733 nan 0.1000 0.0097
## 6 0.0651 nan 0.1000 0.0066
## 7 0.0577 nan 0.1000 0.0069
## 8 0.0513 nan 0.1000 0.0066
## 9 0.0458 nan 0.1000 0.0050
## 10 0.0418 nan 0.1000 0.0040
## 20 0.0187 nan 0.1000 0.0013
## 40 0.0082 nan 0.1000 0.0001
## 60 0.0058 nan 0.1000 -0.0000
## 80 0.0047 nan 0.1000 -0.0000
## 100 0.0039 nan 0.1000 -0.0000
## 120 0.0032 nan 0.1000 -0.0000
## 140 0.0028 nan 0.1000 -0.0000
## 160 0.0024 nan 0.1000 -0.0000
## 180 0.0021 nan 0.1000 -0.0000
## 200 0.0018 nan 0.1000 -0.0000
## 220 0.0016 nan 0.1000 -0.0000
## 240 0.0014 nan 0.1000 -0.0000
## 250 0.0013 nan 0.1000 -0.0000
##
## - Fold5: shrinkage=0.1, interaction.depth=5, n.minobsinnode=10, n.trees=250
## Aggregating results
## Selecting tuning parameters
## Fitting n.trees = 200, interaction.depth = 3, shrinkage = 0.1, n.minobsinnode = 10 on full training set
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1270 nan 0.1000 0.0159
## 2 0.1122 nan 0.1000 0.0146
## 3 0.0986 nan 0.1000 0.0118
## 4 0.0883 nan 0.1000 0.0095
## 5 0.0795 nan 0.1000 0.0072
## 6 0.0712 nan 0.1000 0.0066
## 7 0.0648 nan 0.1000 0.0063
## 8 0.0593 nan 0.1000 0.0041
## 9 0.0542 nan 0.1000 0.0043
## 10 0.0501 nan 0.1000 0.0038
## 20 0.0242 nan 0.1000 0.0010
## 40 0.0119 nan 0.1000 0.0001
## 60 0.0089 nan 0.1000 -0.0001
## 80 0.0077 nan 0.1000 -0.0000
## 100 0.0067 nan 0.1000 -0.0000
## 120 0.0060 nan 0.1000 -0.0000
## 140 0.0054 nan 0.1000 -0.0000
## 160 0.0050 nan 0.1000 -0.0000
## 180 0.0046 nan 0.1000 -0.0000
## 200 0.0043 nan 0.1000 -0.0000
## selected features [varimp]:
## [1] "OverallQual" "Neighborhood" "1stFlrSF" "GarageArea" "ExterQual"
## [6] "TotRmsAbvGrd" "2ndFlrSF" "KitchenQual" "GarageFinish" "BsmtFinSF1"
## [11] "LotArea" "Fireplaces" "MSSubClass" "YearRemodAdd" "OverallCond"
## [16] "MSZoning" "OpenPorchSF" "CentralAir" "MasVnrArea" "BsmtQual"
## [21] "BsmtUnfSF" "GarageYrBlt" "GarageType" "BedroomAbvGr"
## tuning the model
## Preparing recipe
## + Fold1: shrinkage=0.1, interaction.depth= 1, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1449 nan 0.1000 0.0137
## 2 0.1337 nan 0.1000 0.0107
## 3 0.1244 nan 0.1000 0.0102
## 4 0.1166 nan 0.1000 0.0077
## 5 0.1097 nan 0.1000 0.0067
## 6 0.1038 nan 0.1000 0.0055
## 7 0.0983 nan 0.1000 0.0057
## 8 0.0933 nan 0.1000 0.0047
## 9 0.0889 nan 0.1000 0.0041
## 10 0.0846 nan 0.1000 0.0036
## 20 0.0562 nan 0.1000 0.0017
## 40 0.0338 nan 0.1000 0.0005
## 60 0.0247 nan 0.1000 0.0002
## 80 0.0202 nan 0.1000 0.0000
## 100 0.0176 nan 0.1000 0.0001
## 120 0.0161 nan 0.1000 0.0000
## 140 0.0151 nan 0.1000 -0.0000
## 160 0.0145 nan 0.1000 -0.0000
## 180 0.0140 nan 0.1000 -0.0000
## 200 0.0136 nan 0.1000 -0.0000
## 220 0.0133 nan 0.1000 0.0000
## 240 0.0130 nan 0.1000 -0.0000
## 260 0.0128 nan 0.1000 -0.0000
## 280 0.0126 nan 0.1000 -0.0000
## 300 0.0124 nan 0.1000 -0.0000
## 320 0.0122 nan 0.1000 -0.0000
## 340 0.0121 nan 0.1000 -0.0000
## 360 0.0119 nan 0.1000 -0.0000
## 380 0.0117 nan 0.1000 -0.0001
## 400 0.0116 nan 0.1000 -0.0000
## 420 0.0115 nan 0.1000 -0.0000
## 440 0.0114 nan 0.1000 -0.0000
## 460 0.0112 nan 0.1000 -0.0000
## 480 0.0111 nan 0.1000 -0.0000
## 500 0.0110 nan 0.1000 -0.0000
##
## - Fold1: shrinkage=0.1, interaction.depth= 1, n.minobsinnode=10, n.trees=500
## + Fold1: shrinkage=0.1, interaction.depth= 2, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1423 nan 0.1000 0.0161
## 2 0.1287 nan 0.1000 0.0134
## 3 0.1176 nan 0.1000 0.0102
## 4 0.1067 nan 0.1000 0.0100
## 5 0.0975 nan 0.1000 0.0079
## 6 0.0896 nan 0.1000 0.0078
## 7 0.0825 nan 0.1000 0.0070
## 8 0.0773 nan 0.1000 0.0047
## 9 0.0724 nan 0.1000 0.0043
## 10 0.0677 nan 0.1000 0.0050
## 20 0.0394 nan 0.1000 0.0016
## 40 0.0218 nan 0.1000 0.0002
## 60 0.0163 nan 0.1000 0.0001
## 80 0.0140 nan 0.1000 -0.0000
## 100 0.0129 nan 0.1000 -0.0000
## 120 0.0122 nan 0.1000 -0.0000
## 140 0.0116 nan 0.1000 -0.0000
## 160 0.0111 nan 0.1000 -0.0000
## 180 0.0106 nan 0.1000 -0.0000
## 200 0.0102 nan 0.1000 -0.0000
## 220 0.0098 nan 0.1000 -0.0001
## 240 0.0095 nan 0.1000 -0.0000
## 260 0.0093 nan 0.1000 -0.0000
## 280 0.0090 nan 0.1000 -0.0000
## 300 0.0088 nan 0.1000 -0.0000
## 320 0.0085 nan 0.1000 -0.0000
## 340 0.0083 nan 0.1000 -0.0000
## 360 0.0081 nan 0.1000 -0.0000
## 380 0.0079 nan 0.1000 -0.0000
## 400 0.0077 nan 0.1000 -0.0000
## 420 0.0075 nan 0.1000 -0.0000
## 440 0.0073 nan 0.1000 -0.0000
## 460 0.0071 nan 0.1000 -0.0000
## 480 0.0069 nan 0.1000 -0.0000
## 500 0.0068 nan 0.1000 -0.0000
##
## - Fold1: shrinkage=0.1, interaction.depth= 2, n.minobsinnode=10, n.trees=500
## + Fold1: shrinkage=0.1, interaction.depth= 3, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1388 nan 0.1000 0.0189
## 2 0.1232 nan 0.1000 0.0145
## 3 0.1109 nan 0.1000 0.0116
## 4 0.1001 nan 0.1000 0.0108
## 5 0.0905 nan 0.1000 0.0092
## 6 0.0830 nan 0.1000 0.0073
## 7 0.0752 nan 0.1000 0.0059
## 8 0.0696 nan 0.1000 0.0052
## 9 0.0640 nan 0.1000 0.0051
## 10 0.0592 nan 0.1000 0.0043
## 20 0.0329 nan 0.1000 0.0012
## 40 0.0179 nan 0.1000 0.0002
## 60 0.0140 nan 0.1000 -0.0000
## 80 0.0123 nan 0.1000 0.0000
## 100 0.0112 nan 0.1000 0.0000
## 120 0.0104 nan 0.1000 -0.0000
## 140 0.0096 nan 0.1000 -0.0000
## 160 0.0091 nan 0.1000 -0.0000
## 180 0.0086 nan 0.1000 -0.0000
## 200 0.0081 nan 0.1000 -0.0000
## 220 0.0077 nan 0.1000 -0.0000
## 240 0.0073 nan 0.1000 -0.0000
## 260 0.0070 nan 0.1000 -0.0000
## 280 0.0068 nan 0.1000 -0.0000
## 300 0.0065 nan 0.1000 0.0000
## 320 0.0062 nan 0.1000 -0.0000
## 340 0.0060 nan 0.1000 -0.0000
## 360 0.0058 nan 0.1000 -0.0000
## 380 0.0056 nan 0.1000 -0.0000
## 400 0.0054 nan 0.1000 -0.0000
## 420 0.0052 nan 0.1000 -0.0000
## 440 0.0051 nan 0.1000 -0.0000
## 460 0.0049 nan 0.1000 -0.0000
## 480 0.0047 nan 0.1000 -0.0000
## 500 0.0045 nan 0.1000 -0.0000
##
## - Fold1: shrinkage=0.1, interaction.depth= 3, n.minobsinnode=10, n.trees=500
## + Fold1: shrinkage=0.1, interaction.depth= 4, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1390 nan 0.1000 0.0196
## 2 0.1220 nan 0.1000 0.0157
## 3 0.1084 nan 0.1000 0.0134
## 4 0.0972 nan 0.1000 0.0104
## 5 0.0874 nan 0.1000 0.0091
## 6 0.0797 nan 0.1000 0.0074
## 7 0.0726 nan 0.1000 0.0065
## 8 0.0669 nan 0.1000 0.0055
## 9 0.0616 nan 0.1000 0.0046
## 10 0.0569 nan 0.1000 0.0039
## 20 0.0289 nan 0.1000 0.0014
## 40 0.0160 nan 0.1000 0.0002
## 60 0.0125 nan 0.1000 -0.0000
## 80 0.0111 nan 0.1000 -0.0001
## 100 0.0100 nan 0.1000 -0.0000
## 120 0.0091 nan 0.1000 -0.0000
## 140 0.0085 nan 0.1000 -0.0000
## 160 0.0079 nan 0.1000 0.0000
## 180 0.0074 nan 0.1000 -0.0000
## 200 0.0069 nan 0.1000 -0.0000
## 220 0.0065 nan 0.1000 -0.0000
## 240 0.0061 nan 0.1000 -0.0000
## 260 0.0058 nan 0.1000 -0.0000
## 280 0.0055 nan 0.1000 -0.0000
## 300 0.0052 nan 0.1000 -0.0000
## 320 0.0049 nan 0.1000 -0.0000
## 340 0.0047 nan 0.1000 -0.0000
## 360 0.0045 nan 0.1000 -0.0000
## 380 0.0043 nan 0.1000 -0.0000
## 400 0.0041 nan 0.1000 -0.0000
## 420 0.0039 nan 0.1000 -0.0000
## 440 0.0037 nan 0.1000 -0.0000
## 460 0.0036 nan 0.1000 -0.0000
## 480 0.0034 nan 0.1000 -0.0000
## 500 0.0033 nan 0.1000 -0.0000
##
## - Fold1: shrinkage=0.1, interaction.depth= 4, n.minobsinnode=10, n.trees=500
## + Fold1: shrinkage=0.1, interaction.depth= 5, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1379 nan 0.1000 0.0195
## 2 0.1211 nan 0.1000 0.0182
## 3 0.1075 nan 0.1000 0.0129
## 4 0.0957 nan 0.1000 0.0112
## 5 0.0860 nan 0.1000 0.0089
## 6 0.0775 nan 0.1000 0.0076
## 7 0.0705 nan 0.1000 0.0068
## 8 0.0637 nan 0.1000 0.0058
## 9 0.0584 nan 0.1000 0.0052
## 10 0.0536 nan 0.1000 0.0046
## 20 0.0268 nan 0.1000 0.0010
## 40 0.0143 nan 0.1000 0.0002
## 60 0.0114 nan 0.1000 -0.0000
## 80 0.0098 nan 0.1000 -0.0000
## 100 0.0088 nan 0.1000 -0.0001
## 120 0.0080 nan 0.1000 -0.0001
## 140 0.0072 nan 0.1000 -0.0000
## 160 0.0065 nan 0.1000 0.0000
## 180 0.0060 nan 0.1000 -0.0000
## 200 0.0056 nan 0.1000 -0.0000
## 220 0.0052 nan 0.1000 -0.0000
## 240 0.0049 nan 0.1000 -0.0000
## 260 0.0046 nan 0.1000 -0.0000
## 280 0.0043 nan 0.1000 -0.0000
## 300 0.0040 nan 0.1000 -0.0000
## 320 0.0038 nan 0.1000 -0.0000
## 340 0.0036 nan 0.1000 -0.0000
## 360 0.0034 nan 0.1000 -0.0000
## 380 0.0032 nan 0.1000 -0.0000
## 400 0.0030 nan 0.1000 -0.0000
## 420 0.0028 nan 0.1000 -0.0000
## 440 0.0027 nan 0.1000 -0.0000
## 460 0.0025 nan 0.1000 -0.0000
## 480 0.0024 nan 0.1000 -0.0000
## 500 0.0023 nan 0.1000 -0.0000
##
## - Fold1: shrinkage=0.1, interaction.depth= 5, n.minobsinnode=10, n.trees=500
## + Fold1: shrinkage=0.1, interaction.depth= 6, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1374 nan 0.1000 0.0203
## 2 0.1198 nan 0.1000 0.0179
## 3 0.1060 nan 0.1000 0.0136
## 4 0.0938 nan 0.1000 0.0103
## 5 0.0835 nan 0.1000 0.0096
## 6 0.0746 nan 0.1000 0.0082
## 7 0.0672 nan 0.1000 0.0065
## 8 0.0603 nan 0.1000 0.0061
## 9 0.0549 nan 0.1000 0.0055
## 10 0.0502 nan 0.1000 0.0040
## 20 0.0245 nan 0.1000 0.0011
## 40 0.0131 nan 0.1000 0.0002
## 60 0.0104 nan 0.1000 -0.0000
## 80 0.0090 nan 0.1000 -0.0000
## 100 0.0079 nan 0.1000 -0.0000
## 120 0.0071 nan 0.1000 -0.0000
## 140 0.0064 nan 0.1000 -0.0001
## 160 0.0058 nan 0.1000 -0.0001
## 180 0.0053 nan 0.1000 0.0000
## 200 0.0049 nan 0.1000 -0.0000
## 220 0.0045 nan 0.1000 -0.0000
## 240 0.0041 nan 0.1000 -0.0000
## 260 0.0038 nan 0.1000 -0.0000
## 280 0.0035 nan 0.1000 -0.0000
## 300 0.0032 nan 0.1000 -0.0000
## 320 0.0030 nan 0.1000 -0.0000
## 340 0.0028 nan 0.1000 -0.0000
## 360 0.0026 nan 0.1000 -0.0000
## 380 0.0025 nan 0.1000 -0.0000
## 400 0.0023 nan 0.1000 -0.0000
## 420 0.0022 nan 0.1000 -0.0000
## 440 0.0020 nan 0.1000 -0.0000
## 460 0.0019 nan 0.1000 -0.0000
## 480 0.0018 nan 0.1000 -0.0000
## 500 0.0017 nan 0.1000 -0.0000
##
## - Fold1: shrinkage=0.1, interaction.depth= 6, n.minobsinnode=10, n.trees=500
## + Fold1: shrinkage=0.1, interaction.depth= 7, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1370 nan 0.1000 0.0217
## 2 0.1196 nan 0.1000 0.0182
## 3 0.1045 nan 0.1000 0.0145
## 4 0.0925 nan 0.1000 0.0119
## 5 0.0823 nan 0.1000 0.0096
## 6 0.0733 nan 0.1000 0.0085
## 7 0.0656 nan 0.1000 0.0071
## 8 0.0598 nan 0.1000 0.0052
## 9 0.0542 nan 0.1000 0.0057
## 10 0.0490 nan 0.1000 0.0043
## 20 0.0232 nan 0.1000 0.0012
## 40 0.0127 nan 0.1000 0.0000
## 60 0.0099 nan 0.1000 -0.0000
## 80 0.0082 nan 0.1000 -0.0000
## 100 0.0071 nan 0.1000 -0.0000
## 120 0.0063 nan 0.1000 -0.0000
## 140 0.0056 nan 0.1000 -0.0000
## 160 0.0050 nan 0.1000 -0.0000
## 180 0.0045 nan 0.1000 -0.0000
## 200 0.0040 nan 0.1000 -0.0000
## 220 0.0037 nan 0.1000 -0.0000
## 240 0.0033 nan 0.1000 -0.0000
## 260 0.0030 nan 0.1000 -0.0000
## 280 0.0027 nan 0.1000 -0.0000
## 300 0.0025 nan 0.1000 -0.0000
## 320 0.0023 nan 0.1000 -0.0000
## 340 0.0021 nan 0.1000 -0.0000
## 360 0.0019 nan 0.1000 -0.0000
## 380 0.0018 nan 0.1000 -0.0000
## 400 0.0017 nan 0.1000 -0.0000
## 420 0.0015 nan 0.1000 -0.0000
## 440 0.0014 nan 0.1000 -0.0000
## 460 0.0013 nan 0.1000 -0.0000
## 480 0.0012 nan 0.1000 -0.0000
## 500 0.0011 nan 0.1000 -0.0000
##
## - Fold1: shrinkage=0.1, interaction.depth= 7, n.minobsinnode=10, n.trees=500
## + Fold1: shrinkage=0.1, interaction.depth= 8, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1363 nan 0.1000 0.0218
## 2 0.1184 nan 0.1000 0.0176
## 3 0.1032 nan 0.1000 0.0141
## 4 0.0914 nan 0.1000 0.0109
## 5 0.0808 nan 0.1000 0.0105
## 6 0.0714 nan 0.1000 0.0092
## 7 0.0634 nan 0.1000 0.0070
## 8 0.0568 nan 0.1000 0.0063
## 9 0.0513 nan 0.1000 0.0050
## 10 0.0466 nan 0.1000 0.0049
## 20 0.0214 nan 0.1000 0.0011
## 40 0.0118 nan 0.1000 0.0000
## 60 0.0093 nan 0.1000 -0.0000
## 80 0.0078 nan 0.1000 -0.0000
## 100 0.0068 nan 0.1000 -0.0000
## 120 0.0058 nan 0.1000 -0.0000
## 140 0.0050 nan 0.1000 -0.0000
## 160 0.0045 nan 0.1000 -0.0000
## 180 0.0040 nan 0.1000 -0.0000
## 200 0.0036 nan 0.1000 -0.0000
## 220 0.0032 nan 0.1000 -0.0000
## 240 0.0029 nan 0.1000 -0.0000
## 260 0.0026 nan 0.1000 -0.0000
## 280 0.0024 nan 0.1000 -0.0000
## 300 0.0022 nan 0.1000 -0.0000
## 320 0.0020 nan 0.1000 -0.0000
## 340 0.0018 nan 0.1000 -0.0000
## 360 0.0016 nan 0.1000 -0.0000
## 380 0.0015 nan 0.1000 -0.0000
## 400 0.0014 nan 0.1000 -0.0000
## 420 0.0012 nan 0.1000 -0.0000
## 440 0.0011 nan 0.1000 -0.0000
## 460 0.0010 nan 0.1000 -0.0000
## 480 0.0009 nan 0.1000 -0.0000
## 500 0.0009 nan 0.1000 -0.0000
##
## - Fold1: shrinkage=0.1, interaction.depth= 8, n.minobsinnode=10, n.trees=500
## + Fold1: shrinkage=0.1, interaction.depth= 9, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1365 nan 0.1000 0.0212
## 2 0.1188 nan 0.1000 0.0172
## 3 0.1028 nan 0.1000 0.0146
## 4 0.0897 nan 0.1000 0.0125
## 5 0.0791 nan 0.1000 0.0105
## 6 0.0702 nan 0.1000 0.0086
## 7 0.0625 nan 0.1000 0.0069
## 8 0.0562 nan 0.1000 0.0061
## 9 0.0502 nan 0.1000 0.0052
## 10 0.0449 nan 0.1000 0.0048
## 20 0.0208 nan 0.1000 0.0011
## 40 0.0107 nan 0.1000 0.0001
## 60 0.0082 nan 0.1000 -0.0000
## 80 0.0068 nan 0.1000 -0.0001
## 100 0.0058 nan 0.1000 -0.0000
## 120 0.0051 nan 0.1000 -0.0000
## 140 0.0044 nan 0.1000 -0.0000
## 160 0.0038 nan 0.1000 -0.0000
## 180 0.0034 nan 0.1000 -0.0000
## 200 0.0030 nan 0.1000 -0.0000
## 220 0.0026 nan 0.1000 -0.0000
## 240 0.0024 nan 0.1000 -0.0000
## 260 0.0021 nan 0.1000 -0.0000
## 280 0.0019 nan 0.1000 -0.0000
## 300 0.0017 nan 0.1000 -0.0000
## 320 0.0015 nan 0.1000 -0.0000
## 340 0.0013 nan 0.1000 -0.0000
## 360 0.0012 nan 0.1000 -0.0000
## 380 0.0011 nan 0.1000 -0.0000
## 400 0.0010 nan 0.1000 -0.0000
## 420 0.0009 nan 0.1000 -0.0000
## 440 0.0008 nan 0.1000 -0.0000
## 460 0.0007 nan 0.1000 -0.0000
## 480 0.0007 nan 0.1000 -0.0000
## 500 0.0006 nan 0.1000 -0.0000
##
## - Fold1: shrinkage=0.1, interaction.depth= 9, n.minobsinnode=10, n.trees=500
## + Fold1: shrinkage=0.1, interaction.depth=10, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1347 nan 0.1000 0.0216
## 2 0.1163 nan 0.1000 0.0177
## 3 0.1009 nan 0.1000 0.0145
## 4 0.0877 nan 0.1000 0.0112
## 5 0.0769 nan 0.1000 0.0098
## 6 0.0677 nan 0.1000 0.0082
## 7 0.0602 nan 0.1000 0.0073
## 8 0.0535 nan 0.1000 0.0059
## 9 0.0479 nan 0.1000 0.0048
## 10 0.0435 nan 0.1000 0.0039
## 20 0.0198 nan 0.1000 0.0008
## 40 0.0102 nan 0.1000 0.0000
## 60 0.0079 nan 0.1000 -0.0000
## 80 0.0064 nan 0.1000 -0.0001
## 100 0.0053 nan 0.1000 -0.0000
## 120 0.0045 nan 0.1000 -0.0001
## 140 0.0039 nan 0.1000 -0.0000
## 160 0.0034 nan 0.1000 -0.0000
## 180 0.0029 nan 0.1000 -0.0000
## 200 0.0026 nan 0.1000 -0.0000
## 220 0.0022 nan 0.1000 -0.0000
## 240 0.0019 nan 0.1000 -0.0000
## 260 0.0017 nan 0.1000 -0.0000
## 280 0.0015 nan 0.1000 -0.0000
## 300 0.0014 nan 0.1000 -0.0000
## 320 0.0012 nan 0.1000 -0.0000
## 340 0.0011 nan 0.1000 -0.0000
## 360 0.0010 nan 0.1000 -0.0000
## 380 0.0009 nan 0.1000 -0.0000
## 400 0.0008 nan 0.1000 -0.0000
## 420 0.0007 nan 0.1000 -0.0000
## 440 0.0006 nan 0.1000 -0.0000
## 460 0.0006 nan 0.1000 -0.0000
## 480 0.0005 nan 0.1000 -0.0000
## 500 0.0004 nan 0.1000 -0.0000
##
## - Fold1: shrinkage=0.1, interaction.depth=10, n.minobsinnode=10, n.trees=500
## + Fold2: shrinkage=0.1, interaction.depth= 1, n.minobsinnode=10, n.trees=500
## model fit failed for Fold2: shrinkage=0.1, interaction.depth= 1, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold2: shrinkage=0.1, interaction.depth= 1, n.minobsinnode=10, n.trees=500
## + Fold2: shrinkage=0.1, interaction.depth= 2, n.minobsinnode=10, n.trees=500
## model fit failed for Fold2: shrinkage=0.1, interaction.depth= 2, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold2: shrinkage=0.1, interaction.depth= 2, n.minobsinnode=10, n.trees=500
## + Fold2: shrinkage=0.1, interaction.depth= 3, n.minobsinnode=10, n.trees=500
## model fit failed for Fold2: shrinkage=0.1, interaction.depth= 3, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold2: shrinkage=0.1, interaction.depth= 3, n.minobsinnode=10, n.trees=500
## + Fold2: shrinkage=0.1, interaction.depth= 4, n.minobsinnode=10, n.trees=500
## model fit failed for Fold2: shrinkage=0.1, interaction.depth= 4, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold2: shrinkage=0.1, interaction.depth= 4, n.minobsinnode=10, n.trees=500
## + Fold2: shrinkage=0.1, interaction.depth= 5, n.minobsinnode=10, n.trees=500
## model fit failed for Fold2: shrinkage=0.1, interaction.depth= 5, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold2: shrinkage=0.1, interaction.depth= 5, n.minobsinnode=10, n.trees=500
## + Fold2: shrinkage=0.1, interaction.depth= 6, n.minobsinnode=10, n.trees=500
## model fit failed for Fold2: shrinkage=0.1, interaction.depth= 6, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold2: shrinkage=0.1, interaction.depth= 6, n.minobsinnode=10, n.trees=500
## + Fold2: shrinkage=0.1, interaction.depth= 7, n.minobsinnode=10, n.trees=500
## model fit failed for Fold2: shrinkage=0.1, interaction.depth= 7, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold2: shrinkage=0.1, interaction.depth= 7, n.minobsinnode=10, n.trees=500
## + Fold2: shrinkage=0.1, interaction.depth= 8, n.minobsinnode=10, n.trees=500
## model fit failed for Fold2: shrinkage=0.1, interaction.depth= 8, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold2: shrinkage=0.1, interaction.depth= 8, n.minobsinnode=10, n.trees=500
## + Fold2: shrinkage=0.1, interaction.depth= 9, n.minobsinnode=10, n.trees=500
## model fit failed for Fold2: shrinkage=0.1, interaction.depth= 9, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold2: shrinkage=0.1, interaction.depth= 9, n.minobsinnode=10, n.trees=500
## + Fold2: shrinkage=0.1, interaction.depth=10, n.minobsinnode=10, n.trees=500
## model fit failed for Fold2: shrinkage=0.1, interaction.depth=10, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold2: shrinkage=0.1, interaction.depth=10, n.minobsinnode=10, n.trees=500
## + Fold3: shrinkage=0.1, interaction.depth= 1, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1483 nan 0.1000 0.0141
## 2 0.1363 nan 0.1000 0.0115
## 3 0.1265 nan 0.1000 0.0091
## 4 0.1183 nan 0.1000 0.0080
## 5 0.1105 nan 0.1000 0.0078
## 6 0.1036 nan 0.1000 0.0060
## 7 0.0982 nan 0.1000 0.0047
## 8 0.0929 nan 0.1000 0.0051
## 9 0.0882 nan 0.1000 0.0043
## 10 0.0844 nan 0.1000 0.0039
## 20 0.0565 nan 0.1000 0.0017
## 40 0.0340 nan 0.1000 0.0007
## 60 0.0247 nan 0.1000 0.0002
## 80 0.0202 nan 0.1000 0.0000
## 100 0.0177 nan 0.1000 0.0000
## 120 0.0161 nan 0.1000 0.0000
## 140 0.0151 nan 0.1000 0.0000
## 160 0.0143 nan 0.1000 0.0000
## 180 0.0138 nan 0.1000 -0.0000
## 200 0.0133 nan 0.1000 -0.0000
## 220 0.0128 nan 0.1000 -0.0000
## 240 0.0126 nan 0.1000 -0.0000
## 260 0.0122 nan 0.1000 0.0000
## 280 0.0119 nan 0.1000 -0.0000
## 300 0.0116 nan 0.1000 0.0000
## 320 0.0114 nan 0.1000 -0.0000
## 340 0.0113 nan 0.1000 -0.0000
## 360 0.0111 nan 0.1000 -0.0000
## 380 0.0109 nan 0.1000 -0.0000
## 400 0.0108 nan 0.1000 -0.0000
## 420 0.0106 nan 0.1000 -0.0000
## 440 0.0105 nan 0.1000 -0.0000
## 460 0.0104 nan 0.1000 -0.0000
## 480 0.0103 nan 0.1000 -0.0000
## 500 0.0101 nan 0.1000 -0.0000
##
## - Fold3: shrinkage=0.1, interaction.depth= 1, n.minobsinnode=10, n.trees=500
## + Fold3: shrinkage=0.1, interaction.depth= 2, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1455 nan 0.1000 0.0168
## 2 0.1310 nan 0.1000 0.0137
## 3 0.1196 nan 0.1000 0.0107
## 4 0.1092 nan 0.1000 0.0110
## 5 0.1010 nan 0.1000 0.0073
## 6 0.0928 nan 0.1000 0.0080
## 7 0.0853 nan 0.1000 0.0068
## 8 0.0789 nan 0.1000 0.0061
## 9 0.0731 nan 0.1000 0.0053
## 10 0.0684 nan 0.1000 0.0041
## 20 0.0405 nan 0.1000 0.0013
## 40 0.0221 nan 0.1000 0.0003
## 60 0.0162 nan 0.1000 0.0001
## 80 0.0139 nan 0.1000 0.0001
## 100 0.0125 nan 0.1000 0.0001
## 120 0.0116 nan 0.1000 -0.0000
## 140 0.0108 nan 0.1000 -0.0000
## 160 0.0102 nan 0.1000 -0.0000
## 180 0.0098 nan 0.1000 -0.0001
## 200 0.0093 nan 0.1000 0.0000
## 220 0.0089 nan 0.1000 -0.0000
## 240 0.0086 nan 0.1000 -0.0000
## 260 0.0083 nan 0.1000 -0.0000
## 280 0.0080 nan 0.1000 -0.0000
## 300 0.0078 nan 0.1000 -0.0000
## 320 0.0075 nan 0.1000 -0.0000
## 340 0.0073 nan 0.1000 -0.0000
## 360 0.0071 nan 0.1000 -0.0000
## 380 0.0070 nan 0.1000 -0.0000
## 400 0.0068 nan 0.1000 -0.0000
## 420 0.0067 nan 0.1000 -0.0000
## 440 0.0065 nan 0.1000 -0.0000
## 460 0.0064 nan 0.1000 -0.0000
## 480 0.0062 nan 0.1000 -0.0000
## 500 0.0061 nan 0.1000 -0.0000
##
## - Fold3: shrinkage=0.1, interaction.depth= 2, n.minobsinnode=10, n.trees=500
## + Fold3: shrinkage=0.1, interaction.depth= 3, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1429 nan 0.1000 0.0192
## 2 0.1276 nan 0.1000 0.0135
## 3 0.1135 nan 0.1000 0.0142
## 4 0.1021 nan 0.1000 0.0108
## 5 0.0923 nan 0.1000 0.0096
## 6 0.0847 nan 0.1000 0.0076
## 7 0.0776 nan 0.1000 0.0069
## 8 0.0712 nan 0.1000 0.0059
## 9 0.0653 nan 0.1000 0.0052
## 10 0.0607 nan 0.1000 0.0045
## 20 0.0324 nan 0.1000 0.0009
## 40 0.0170 nan 0.1000 0.0002
## 60 0.0128 nan 0.1000 0.0001
## 80 0.0111 nan 0.1000 0.0000
## 100 0.0100 nan 0.1000 -0.0000
## 120 0.0092 nan 0.1000 -0.0000
## 140 0.0085 nan 0.1000 -0.0000
## 160 0.0080 nan 0.1000 0.0000
## 180 0.0076 nan 0.1000 -0.0000
## 200 0.0071 nan 0.1000 -0.0000
## 220 0.0067 nan 0.1000 -0.0000
## 240 0.0065 nan 0.1000 -0.0001
## 260 0.0062 nan 0.1000 -0.0000
## 280 0.0060 nan 0.1000 -0.0000
## 300 0.0058 nan 0.1000 -0.0000
## 320 0.0055 nan 0.1000 -0.0000
## 340 0.0053 nan 0.1000 -0.0000
## 360 0.0052 nan 0.1000 -0.0000
## 380 0.0050 nan 0.1000 -0.0000
## 400 0.0049 nan 0.1000 -0.0000
## 420 0.0047 nan 0.1000 -0.0000
## 440 0.0046 nan 0.1000 -0.0000
## 460 0.0044 nan 0.1000 -0.0000
## 480 0.0042 nan 0.1000 -0.0000
## 500 0.0041 nan 0.1000 -0.0000
##
## - Fold3: shrinkage=0.1, interaction.depth= 3, n.minobsinnode=10, n.trees=500
## + Fold3: shrinkage=0.1, interaction.depth= 4, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1429 nan 0.1000 0.0182
## 2 0.1261 nan 0.1000 0.0153
## 3 0.1119 nan 0.1000 0.0132
## 4 0.1004 nan 0.1000 0.0121
## 5 0.0902 nan 0.1000 0.0098
## 6 0.0813 nan 0.1000 0.0087
## 7 0.0738 nan 0.1000 0.0070
## 8 0.0675 nan 0.1000 0.0053
## 9 0.0617 nan 0.1000 0.0047
## 10 0.0566 nan 0.1000 0.0050
## 20 0.0288 nan 0.1000 0.0013
## 40 0.0146 nan 0.1000 0.0002
## 60 0.0111 nan 0.1000 -0.0000
## 80 0.0097 nan 0.1000 -0.0000
## 100 0.0087 nan 0.1000 -0.0000
## 120 0.0079 nan 0.1000 -0.0000
## 140 0.0071 nan 0.1000 -0.0000
## 160 0.0066 nan 0.1000 -0.0000
## 180 0.0062 nan 0.1000 -0.0000
## 200 0.0059 nan 0.1000 -0.0000
## 220 0.0055 nan 0.1000 -0.0000
## 240 0.0052 nan 0.1000 -0.0000
## 260 0.0049 nan 0.1000 -0.0000
## 280 0.0046 nan 0.1000 -0.0000
## 300 0.0044 nan 0.1000 -0.0000
## 320 0.0042 nan 0.1000 -0.0000
## 340 0.0040 nan 0.1000 -0.0000
## 360 0.0038 nan 0.1000 -0.0000
## 380 0.0036 nan 0.1000 -0.0000
## 400 0.0035 nan 0.1000 -0.0000
## 420 0.0033 nan 0.1000 -0.0000
## 440 0.0032 nan 0.1000 -0.0000
## 460 0.0030 nan 0.1000 -0.0000
## 480 0.0029 nan 0.1000 -0.0000
## 500 0.0028 nan 0.1000 -0.0000
##
## - Fold3: shrinkage=0.1, interaction.depth= 4, n.minobsinnode=10, n.trees=500
## + Fold3: shrinkage=0.1, interaction.depth= 5, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1405 nan 0.1000 0.0216
## 2 0.1246 nan 0.1000 0.0163
## 3 0.1100 nan 0.1000 0.0132
## 4 0.0975 nan 0.1000 0.0114
## 5 0.0871 nan 0.1000 0.0086
## 6 0.0776 nan 0.1000 0.0081
## 7 0.0696 nan 0.1000 0.0067
## 8 0.0621 nan 0.1000 0.0068
## 9 0.0557 nan 0.1000 0.0055
## 10 0.0508 nan 0.1000 0.0043
## 20 0.0248 nan 0.1000 0.0013
## 40 0.0124 nan 0.1000 0.0001
## 60 0.0098 nan 0.1000 -0.0000
## 80 0.0084 nan 0.1000 -0.0001
## 100 0.0075 nan 0.1000 0.0000
## 120 0.0067 nan 0.1000 -0.0000
## 140 0.0062 nan 0.1000 -0.0000
## 160 0.0057 nan 0.1000 -0.0000
## 180 0.0053 nan 0.1000 -0.0000
## 200 0.0049 nan 0.1000 -0.0000
## 220 0.0046 nan 0.1000 -0.0000
## 240 0.0043 nan 0.1000 -0.0000
## 260 0.0041 nan 0.1000 -0.0000
## 280 0.0039 nan 0.1000 -0.0000
## 300 0.0037 nan 0.1000 -0.0000
## 320 0.0035 nan 0.1000 -0.0000
## 340 0.0034 nan 0.1000 -0.0000
## 360 0.0032 nan 0.1000 -0.0000
## 380 0.0030 nan 0.1000 -0.0000
## 400 0.0028 nan 0.1000 -0.0000
## 420 0.0027 nan 0.1000 -0.0000
## 440 0.0025 nan 0.1000 -0.0000
## 460 0.0024 nan 0.1000 -0.0000
## 480 0.0023 nan 0.1000 -0.0000
## 500 0.0022 nan 0.1000 -0.0000
##
## - Fold3: shrinkage=0.1, interaction.depth= 5, n.minobsinnode=10, n.trees=500
## + Fold3: shrinkage=0.1, interaction.depth= 6, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1428 nan 0.1000 0.0208
## 2 0.1256 nan 0.1000 0.0170
## 3 0.1101 nan 0.1000 0.0152
## 4 0.0976 nan 0.1000 0.0118
## 5 0.0866 nan 0.1000 0.0112
## 6 0.0775 nan 0.1000 0.0089
## 7 0.0696 nan 0.1000 0.0075
## 8 0.0626 nan 0.1000 0.0064
## 9 0.0570 nan 0.1000 0.0053
## 10 0.0517 nan 0.1000 0.0048
## 20 0.0242 nan 0.1000 0.0013
## 40 0.0120 nan 0.1000 0.0001
## 60 0.0093 nan 0.1000 -0.0001
## 80 0.0078 nan 0.1000 -0.0000
## 100 0.0069 nan 0.1000 0.0000
## 120 0.0061 nan 0.1000 -0.0000
## 140 0.0056 nan 0.1000 -0.0000
## 160 0.0051 nan 0.1000 -0.0000
## 180 0.0047 nan 0.1000 -0.0000
## 200 0.0043 nan 0.1000 -0.0000
## 220 0.0040 nan 0.1000 -0.0000
## 240 0.0037 nan 0.1000 -0.0000
## 260 0.0034 nan 0.1000 -0.0000
## 280 0.0032 nan 0.1000 -0.0000
## 300 0.0030 nan 0.1000 -0.0000
## 320 0.0028 nan 0.1000 -0.0000
## 340 0.0026 nan 0.1000 -0.0000
## 360 0.0024 nan 0.1000 -0.0000
## 380 0.0023 nan 0.1000 -0.0000
## 400 0.0021 nan 0.1000 -0.0000
## 420 0.0020 nan 0.1000 -0.0000
## 440 0.0019 nan 0.1000 -0.0000
## 460 0.0018 nan 0.1000 -0.0000
## 480 0.0017 nan 0.1000 -0.0000
## 500 0.0016 nan 0.1000 -0.0000
##
## - Fold3: shrinkage=0.1, interaction.depth= 6, n.minobsinnode=10, n.trees=500
## + Fold3: shrinkage=0.1, interaction.depth= 7, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1405 nan 0.1000 0.0223
## 2 0.1217 nan 0.1000 0.0171
## 3 0.1060 nan 0.1000 0.0143
## 4 0.0925 nan 0.1000 0.0136
## 5 0.0819 nan 0.1000 0.0096
## 6 0.0726 nan 0.1000 0.0090
## 7 0.0643 nan 0.1000 0.0073
## 8 0.0573 nan 0.1000 0.0065
## 9 0.0518 nan 0.1000 0.0049
## 10 0.0468 nan 0.1000 0.0043
## 20 0.0217 nan 0.1000 0.0009
## 40 0.0108 nan 0.1000 0.0001
## 60 0.0083 nan 0.1000 -0.0001
## 80 0.0070 nan 0.1000 -0.0000
## 100 0.0062 nan 0.1000 -0.0000
## 120 0.0055 nan 0.1000 -0.0000
## 140 0.0049 nan 0.1000 -0.0000
## 160 0.0044 nan 0.1000 -0.0000
## 180 0.0040 nan 0.1000 -0.0000
## 200 0.0036 nan 0.1000 -0.0000
## 220 0.0033 nan 0.1000 -0.0000
## 240 0.0030 nan 0.1000 -0.0000
## 260 0.0028 nan 0.1000 -0.0000
## 280 0.0026 nan 0.1000 -0.0000
## 300 0.0024 nan 0.1000 -0.0000
## 320 0.0022 nan 0.1000 -0.0000
## 340 0.0021 nan 0.1000 -0.0000
## 360 0.0019 nan 0.1000 -0.0000
## 380 0.0017 nan 0.1000 -0.0000
## 400 0.0016 nan 0.1000 -0.0000
## 420 0.0015 nan 0.1000 -0.0000
## 440 0.0014 nan 0.1000 -0.0000
## 460 0.0013 nan 0.1000 -0.0000
## 480 0.0012 nan 0.1000 -0.0000
## 500 0.0011 nan 0.1000 -0.0000
##
## - Fold3: shrinkage=0.1, interaction.depth= 7, n.minobsinnode=10, n.trees=500
## + Fold3: shrinkage=0.1, interaction.depth= 8, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1398 nan 0.1000 0.0209
## 2 0.1221 nan 0.1000 0.0182
## 3 0.1070 nan 0.1000 0.0150
## 4 0.0935 nan 0.1000 0.0132
## 5 0.0822 nan 0.1000 0.0109
## 6 0.0728 nan 0.1000 0.0099
## 7 0.0648 nan 0.1000 0.0078
## 8 0.0577 nan 0.1000 0.0063
## 9 0.0522 nan 0.1000 0.0054
## 10 0.0469 nan 0.1000 0.0046
## 20 0.0207 nan 0.1000 0.0009
## 40 0.0106 nan 0.1000 0.0000
## 60 0.0081 nan 0.1000 -0.0001
## 80 0.0067 nan 0.1000 -0.0001
## 100 0.0058 nan 0.1000 -0.0000
## 120 0.0051 nan 0.1000 -0.0000
## 140 0.0045 nan 0.1000 -0.0000
## 160 0.0040 nan 0.1000 -0.0000
## 180 0.0036 nan 0.1000 -0.0000
## 200 0.0032 nan 0.1000 -0.0000
## 220 0.0029 nan 0.1000 -0.0000
## 240 0.0026 nan 0.1000 -0.0000
## 260 0.0024 nan 0.1000 -0.0000
## 280 0.0022 nan 0.1000 -0.0000
## 300 0.0020 nan 0.1000 -0.0000
## 320 0.0018 nan 0.1000 -0.0000
## 340 0.0016 nan 0.1000 -0.0000
## 360 0.0015 nan 0.1000 -0.0000
## 380 0.0014 nan 0.1000 -0.0000
## 400 0.0012 nan 0.1000 -0.0000
## 420 0.0011 nan 0.1000 -0.0000
## 440 0.0011 nan 0.1000 -0.0000
## 460 0.0010 nan 0.1000 -0.0000
## 480 0.0009 nan 0.1000 -0.0000
## 500 0.0008 nan 0.1000 -0.0000
##
## - Fold3: shrinkage=0.1, interaction.depth= 8, n.minobsinnode=10, n.trees=500
## + Fold3: shrinkage=0.1, interaction.depth= 9, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1389 nan 0.1000 0.0236
## 2 0.1203 nan 0.1000 0.0169
## 3 0.1048 nan 0.1000 0.0159
## 4 0.0912 nan 0.1000 0.0113
## 5 0.0800 nan 0.1000 0.0104
## 6 0.0704 nan 0.1000 0.0089
## 7 0.0619 nan 0.1000 0.0073
## 8 0.0555 nan 0.1000 0.0058
## 9 0.0497 nan 0.1000 0.0053
## 10 0.0449 nan 0.1000 0.0046
## 20 0.0192 nan 0.1000 0.0009
## 40 0.0100 nan 0.1000 0.0000
## 60 0.0075 nan 0.1000 -0.0000
## 80 0.0062 nan 0.1000 -0.0001
## 100 0.0052 nan 0.1000 -0.0000
## 120 0.0045 nan 0.1000 -0.0000
## 140 0.0039 nan 0.1000 -0.0000
## 160 0.0034 nan 0.1000 -0.0000
## 180 0.0030 nan 0.1000 -0.0000
## 200 0.0026 nan 0.1000 -0.0000
## 220 0.0023 nan 0.1000 -0.0000
## 240 0.0021 nan 0.1000 -0.0000
## 260 0.0019 nan 0.1000 -0.0000
## 280 0.0017 nan 0.1000 -0.0000
## 300 0.0015 nan 0.1000 -0.0000
## 320 0.0013 nan 0.1000 -0.0000
## 340 0.0012 nan 0.1000 -0.0000
## 360 0.0011 nan 0.1000 -0.0000
## 380 0.0010 nan 0.1000 -0.0000
## 400 0.0009 nan 0.1000 -0.0000
## 420 0.0008 nan 0.1000 -0.0000
## 440 0.0008 nan 0.1000 -0.0000
## 460 0.0007 nan 0.1000 -0.0000
## 480 0.0006 nan 0.1000 -0.0000
## 500 0.0006 nan 0.1000 -0.0000
##
## - Fold3: shrinkage=0.1, interaction.depth= 9, n.minobsinnode=10, n.trees=500
## + Fold3: shrinkage=0.1, interaction.depth=10, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1395 nan 0.1000 0.0228
## 2 0.1206 nan 0.1000 0.0171
## 3 0.1048 nan 0.1000 0.0154
## 4 0.0913 nan 0.1000 0.0116
## 5 0.0800 nan 0.1000 0.0106
## 6 0.0703 nan 0.1000 0.0095
## 7 0.0621 nan 0.1000 0.0079
## 8 0.0555 nan 0.1000 0.0065
## 9 0.0493 nan 0.1000 0.0059
## 10 0.0442 nan 0.1000 0.0046
## 20 0.0188 nan 0.1000 0.0010
## 40 0.0092 nan 0.1000 0.0001
## 60 0.0070 nan 0.1000 -0.0000
## 80 0.0057 nan 0.1000 -0.0000
## 100 0.0049 nan 0.1000 -0.0000
## 120 0.0042 nan 0.1000 -0.0001
## 140 0.0037 nan 0.1000 -0.0000
## 160 0.0032 nan 0.1000 -0.0000
## 180 0.0028 nan 0.1000 -0.0000
## 200 0.0024 nan 0.1000 -0.0000
## 220 0.0021 nan 0.1000 -0.0000
## 240 0.0019 nan 0.1000 -0.0000
## 260 0.0017 nan 0.1000 -0.0000
## 280 0.0015 nan 0.1000 -0.0000
## 300 0.0013 nan 0.1000 -0.0000
## 320 0.0012 nan 0.1000 -0.0000
## 340 0.0010 nan 0.1000 -0.0000
## 360 0.0009 nan 0.1000 -0.0000
## 380 0.0008 nan 0.1000 -0.0000
## 400 0.0007 nan 0.1000 -0.0000
## 420 0.0007 nan 0.1000 -0.0000
## 440 0.0006 nan 0.1000 -0.0000
## 460 0.0006 nan 0.1000 -0.0000
## 480 0.0005 nan 0.1000 -0.0000
## 500 0.0005 nan 0.1000 -0.0000
##
## - Fold3: shrinkage=0.1, interaction.depth=10, n.minobsinnode=10, n.trees=500
## + Fold4: shrinkage=0.1, interaction.depth= 1, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1470 nan 0.1000 0.0144
## 2 0.1352 nan 0.1000 0.0113
## 3 0.1258 nan 0.1000 0.0085
## 4 0.1168 nan 0.1000 0.0090
## 5 0.1092 nan 0.1000 0.0071
## 6 0.1028 nan 0.1000 0.0061
## 7 0.0971 nan 0.1000 0.0052
## 8 0.0925 nan 0.1000 0.0045
## 9 0.0881 nan 0.1000 0.0042
## 10 0.0841 nan 0.1000 0.0042
## 20 0.0567 nan 0.1000 0.0014
## 40 0.0339 nan 0.1000 0.0005
## 60 0.0249 nan 0.1000 0.0002
## 80 0.0208 nan 0.1000 0.0001
## 100 0.0183 nan 0.1000 0.0001
## 120 0.0167 nan 0.1000 0.0000
## 140 0.0159 nan 0.1000 -0.0000
## 160 0.0152 nan 0.1000 -0.0000
## 180 0.0147 nan 0.1000 -0.0000
## 200 0.0142 nan 0.1000 -0.0000
## 220 0.0139 nan 0.1000 -0.0000
## 240 0.0136 nan 0.1000 -0.0000
## 260 0.0133 nan 0.1000 -0.0000
## 280 0.0130 nan 0.1000 -0.0000
## 300 0.0127 nan 0.1000 -0.0000
## 320 0.0125 nan 0.1000 -0.0000
## 340 0.0123 nan 0.1000 -0.0000
## 360 0.0121 nan 0.1000 -0.0000
## 380 0.0120 nan 0.1000 -0.0000
## 400 0.0118 nan 0.1000 -0.0000
## 420 0.0117 nan 0.1000 -0.0000
## 440 0.0116 nan 0.1000 -0.0000
## 460 0.0114 nan 0.1000 -0.0000
## 480 0.0113 nan 0.1000 -0.0000
## 500 0.0112 nan 0.1000 -0.0000
##
## - Fold4: shrinkage=0.1, interaction.depth= 1, n.minobsinnode=10, n.trees=500
## + Fold4: shrinkage=0.1, interaction.depth= 2, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1440 nan 0.1000 0.0163
## 2 0.1300 nan 0.1000 0.0139
## 3 0.1190 nan 0.1000 0.0109
## 4 0.1079 nan 0.1000 0.0104
## 5 0.0996 nan 0.1000 0.0081
## 6 0.0917 nan 0.1000 0.0081
## 7 0.0852 nan 0.1000 0.0061
## 8 0.0790 nan 0.1000 0.0055
## 9 0.0736 nan 0.1000 0.0048
## 10 0.0686 nan 0.1000 0.0049
## 20 0.0407 nan 0.1000 0.0018
## 40 0.0225 nan 0.1000 0.0004
## 60 0.0169 nan 0.1000 0.0001
## 80 0.0146 nan 0.1000 -0.0000
## 100 0.0135 nan 0.1000 -0.0000
## 120 0.0128 nan 0.1000 0.0000
## 140 0.0121 nan 0.1000 -0.0000
## 160 0.0115 nan 0.1000 -0.0000
## 180 0.0111 nan 0.1000 -0.0000
## 200 0.0107 nan 0.1000 -0.0000
## 220 0.0103 nan 0.1000 -0.0000
## 240 0.0099 nan 0.1000 -0.0000
## 260 0.0096 nan 0.1000 -0.0000
## 280 0.0093 nan 0.1000 -0.0000
## 300 0.0090 nan 0.1000 -0.0000
## 320 0.0088 nan 0.1000 -0.0000
## 340 0.0085 nan 0.1000 -0.0000
## 360 0.0083 nan 0.1000 -0.0000
## 380 0.0081 nan 0.1000 -0.0000
## 400 0.0079 nan 0.1000 -0.0000
## 420 0.0077 nan 0.1000 -0.0000
## 440 0.0075 nan 0.1000 -0.0000
## 460 0.0074 nan 0.1000 -0.0000
## 480 0.0072 nan 0.1000 -0.0000
## 500 0.0071 nan 0.1000 -0.0000
##
## - Fold4: shrinkage=0.1, interaction.depth= 2, n.minobsinnode=10, n.trees=500
## + Fold4: shrinkage=0.1, interaction.depth= 3, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1419 nan 0.1000 0.0193
## 2 0.1256 nan 0.1000 0.0153
## 3 0.1132 nan 0.1000 0.0113
## 4 0.1023 nan 0.1000 0.0099
## 5 0.0925 nan 0.1000 0.0095
## 6 0.0841 nan 0.1000 0.0075
## 7 0.0769 nan 0.1000 0.0069
## 8 0.0708 nan 0.1000 0.0055
## 9 0.0652 nan 0.1000 0.0041
## 10 0.0607 nan 0.1000 0.0045
## 20 0.0329 nan 0.1000 0.0013
## 40 0.0182 nan 0.1000 0.0002
## 60 0.0143 nan 0.1000 0.0000
## 80 0.0128 nan 0.1000 0.0000
## 100 0.0117 nan 0.1000 -0.0000
## 120 0.0109 nan 0.1000 -0.0000
## 140 0.0103 nan 0.1000 -0.0000
## 160 0.0097 nan 0.1000 -0.0000
## 180 0.0091 nan 0.1000 -0.0000
## 200 0.0087 nan 0.1000 -0.0000
## 220 0.0083 nan 0.1000 -0.0000
## 240 0.0079 nan 0.1000 -0.0001
## 260 0.0077 nan 0.1000 -0.0000
## 280 0.0074 nan 0.1000 -0.0000
## 300 0.0070 nan 0.1000 -0.0000
## 320 0.0068 nan 0.1000 -0.0000
## 340 0.0065 nan 0.1000 -0.0000
## 360 0.0062 nan 0.1000 -0.0000
## 380 0.0059 nan 0.1000 -0.0000
## 400 0.0058 nan 0.1000 -0.0000
## 420 0.0056 nan 0.1000 -0.0000
## 440 0.0054 nan 0.1000 -0.0000
## 460 0.0053 nan 0.1000 -0.0000
## 480 0.0051 nan 0.1000 -0.0000
## 500 0.0050 nan 0.1000 -0.0000
##
## - Fold4: shrinkage=0.1, interaction.depth= 3, n.minobsinnode=10, n.trees=500
## + Fold4: shrinkage=0.1, interaction.depth= 4, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1402 nan 0.1000 0.0195
## 2 0.1241 nan 0.1000 0.0157
## 3 0.1103 nan 0.1000 0.0127
## 4 0.0988 nan 0.1000 0.0117
## 5 0.0892 nan 0.1000 0.0093
## 6 0.0801 nan 0.1000 0.0083
## 7 0.0727 nan 0.1000 0.0067
## 8 0.0662 nan 0.1000 0.0056
## 9 0.0600 nan 0.1000 0.0062
## 10 0.0555 nan 0.1000 0.0041
## 20 0.0289 nan 0.1000 0.0014
## 40 0.0158 nan 0.1000 0.0002
## 60 0.0125 nan 0.1000 -0.0000
## 80 0.0111 nan 0.1000 -0.0001
## 100 0.0100 nan 0.1000 -0.0000
## 120 0.0092 nan 0.1000 -0.0000
## 140 0.0086 nan 0.1000 -0.0000
## 160 0.0080 nan 0.1000 -0.0000
## 180 0.0074 nan 0.1000 -0.0000
## 200 0.0070 nan 0.1000 -0.0000
## 220 0.0066 nan 0.1000 -0.0000
## 240 0.0062 nan 0.1000 -0.0000
## 260 0.0059 nan 0.1000 -0.0000
## 280 0.0056 nan 0.1000 -0.0000
## 300 0.0053 nan 0.1000 -0.0000
## 320 0.0051 nan 0.1000 -0.0000
## 340 0.0048 nan 0.1000 -0.0000
## 360 0.0046 nan 0.1000 -0.0000
## 380 0.0044 nan 0.1000 -0.0000
## 400 0.0042 nan 0.1000 -0.0000
## 420 0.0040 nan 0.1000 -0.0000
## 440 0.0039 nan 0.1000 -0.0000
## 460 0.0037 nan 0.1000 -0.0000
## 480 0.0035 nan 0.1000 -0.0000
## 500 0.0034 nan 0.1000 -0.0000
##
## - Fold4: shrinkage=0.1, interaction.depth= 4, n.minobsinnode=10, n.trees=500
## + Fold4: shrinkage=0.1, interaction.depth= 5, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1410 nan 0.1000 0.0201
## 2 0.1233 nan 0.1000 0.0166
## 3 0.1090 nan 0.1000 0.0135
## 4 0.0969 nan 0.1000 0.0108
## 5 0.0868 nan 0.1000 0.0100
## 6 0.0779 nan 0.1000 0.0084
## 7 0.0701 nan 0.1000 0.0072
## 8 0.0632 nan 0.1000 0.0064
## 9 0.0578 nan 0.1000 0.0053
## 10 0.0527 nan 0.1000 0.0041
## 20 0.0259 nan 0.1000 0.0011
## 40 0.0141 nan 0.1000 0.0002
## 60 0.0113 nan 0.1000 -0.0000
## 80 0.0099 nan 0.1000 -0.0000
## 100 0.0089 nan 0.1000 -0.0001
## 120 0.0080 nan 0.1000 -0.0000
## 140 0.0074 nan 0.1000 -0.0000
## 160 0.0068 nan 0.1000 -0.0000
## 180 0.0064 nan 0.1000 -0.0000
## 200 0.0059 nan 0.1000 -0.0000
## 220 0.0055 nan 0.1000 -0.0000
## 240 0.0052 nan 0.1000 -0.0000
## 260 0.0049 nan 0.1000 -0.0000
## 280 0.0046 nan 0.1000 -0.0000
## 300 0.0043 nan 0.1000 -0.0000
## 320 0.0041 nan 0.1000 -0.0000
## 340 0.0039 nan 0.1000 -0.0000
## 360 0.0037 nan 0.1000 -0.0000
## 380 0.0034 nan 0.1000 -0.0000
## 400 0.0033 nan 0.1000 -0.0000
## 420 0.0031 nan 0.1000 -0.0000
## 440 0.0029 nan 0.1000 -0.0000
## 460 0.0028 nan 0.1000 -0.0000
## 480 0.0026 nan 0.1000 -0.0000
## 500 0.0025 nan 0.1000 -0.0000
##
## - Fold4: shrinkage=0.1, interaction.depth= 5, n.minobsinnode=10, n.trees=500
## + Fold4: shrinkage=0.1, interaction.depth= 6, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1398 nan 0.1000 0.0213
## 2 0.1224 nan 0.1000 0.0169
## 3 0.1071 nan 0.1000 0.0143
## 4 0.0947 nan 0.1000 0.0115
## 5 0.0840 nan 0.1000 0.0104
## 6 0.0752 nan 0.1000 0.0076
## 7 0.0677 nan 0.1000 0.0076
## 8 0.0610 nan 0.1000 0.0063
## 9 0.0556 nan 0.1000 0.0048
## 10 0.0503 nan 0.1000 0.0048
## 20 0.0245 nan 0.1000 0.0010
## 40 0.0135 nan 0.1000 0.0001
## 60 0.0110 nan 0.1000 -0.0001
## 80 0.0095 nan 0.1000 -0.0000
## 100 0.0084 nan 0.1000 -0.0000
## 120 0.0074 nan 0.1000 -0.0001
## 140 0.0068 nan 0.1000 -0.0000
## 160 0.0062 nan 0.1000 -0.0000
## 180 0.0057 nan 0.1000 -0.0000
## 200 0.0052 nan 0.1000 -0.0000
## 220 0.0048 nan 0.1000 -0.0000
## 240 0.0044 nan 0.1000 -0.0000
## 260 0.0040 nan 0.1000 -0.0000
## 280 0.0037 nan 0.1000 -0.0000
## 300 0.0035 nan 0.1000 -0.0000
## 320 0.0032 nan 0.1000 -0.0000
## 340 0.0030 nan 0.1000 -0.0000
## 360 0.0028 nan 0.1000 -0.0000
## 380 0.0026 nan 0.1000 -0.0000
## 400 0.0024 nan 0.1000 -0.0000
## 420 0.0023 nan 0.1000 -0.0000
## 440 0.0022 nan 0.1000 -0.0000
## 460 0.0020 nan 0.1000 -0.0000
## 480 0.0019 nan 0.1000 -0.0000
## 500 0.0018 nan 0.1000 -0.0000
##
## - Fold4: shrinkage=0.1, interaction.depth= 6, n.minobsinnode=10, n.trees=500
## + Fold4: shrinkage=0.1, interaction.depth= 7, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1392 nan 0.1000 0.0217
## 2 0.1210 nan 0.1000 0.0168
## 3 0.1055 nan 0.1000 0.0148
## 4 0.0919 nan 0.1000 0.0130
## 5 0.0814 nan 0.1000 0.0101
## 6 0.0719 nan 0.1000 0.0078
## 7 0.0644 nan 0.1000 0.0065
## 8 0.0579 nan 0.1000 0.0054
## 9 0.0523 nan 0.1000 0.0052
## 10 0.0476 nan 0.1000 0.0042
## 20 0.0226 nan 0.1000 0.0009
## 40 0.0125 nan 0.1000 0.0001
## 60 0.0100 nan 0.1000 -0.0001
## 80 0.0085 nan 0.1000 -0.0000
## 100 0.0073 nan 0.1000 -0.0000
## 120 0.0063 nan 0.1000 -0.0000
## 140 0.0057 nan 0.1000 -0.0000
## 160 0.0051 nan 0.1000 -0.0000
## 180 0.0046 nan 0.1000 -0.0000
## 200 0.0041 nan 0.1000 -0.0000
## 220 0.0037 nan 0.1000 -0.0000
## 240 0.0034 nan 0.1000 -0.0000
## 260 0.0031 nan 0.1000 -0.0000
## 280 0.0028 nan 0.1000 -0.0000
## 300 0.0026 nan 0.1000 -0.0000
## 320 0.0024 nan 0.1000 -0.0000
## 340 0.0022 nan 0.1000 -0.0000
## 360 0.0020 nan 0.1000 -0.0000
## 380 0.0018 nan 0.1000 -0.0000
## 400 0.0017 nan 0.1000 -0.0000
## 420 0.0016 nan 0.1000 -0.0000
## 440 0.0015 nan 0.1000 -0.0000
## 460 0.0014 nan 0.1000 -0.0000
## 480 0.0013 nan 0.1000 -0.0000
## 500 0.0012 nan 0.1000 -0.0000
##
## - Fold4: shrinkage=0.1, interaction.depth= 7, n.minobsinnode=10, n.trees=500
## + Fold4: shrinkage=0.1, interaction.depth= 8, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1386 nan 0.1000 0.0216
## 2 0.1200 nan 0.1000 0.0172
## 3 0.1051 nan 0.1000 0.0140
## 4 0.0924 nan 0.1000 0.0101
## 5 0.0814 nan 0.1000 0.0104
## 6 0.0719 nan 0.1000 0.0086
## 7 0.0645 nan 0.1000 0.0068
## 8 0.0576 nan 0.1000 0.0065
## 9 0.0516 nan 0.1000 0.0060
## 10 0.0466 nan 0.1000 0.0050
## 20 0.0220 nan 0.1000 0.0011
## 40 0.0117 nan 0.1000 0.0000
## 60 0.0091 nan 0.1000 0.0000
## 80 0.0076 nan 0.1000 -0.0000
## 100 0.0065 nan 0.1000 -0.0000
## 120 0.0057 nan 0.1000 -0.0000
## 140 0.0051 nan 0.1000 -0.0000
## 160 0.0046 nan 0.1000 -0.0000
## 180 0.0041 nan 0.1000 -0.0000
## 200 0.0036 nan 0.1000 -0.0000
## 220 0.0033 nan 0.1000 -0.0000
## 240 0.0030 nan 0.1000 -0.0000
## 260 0.0027 nan 0.1000 -0.0000
## 280 0.0024 nan 0.1000 -0.0000
## 300 0.0022 nan 0.1000 -0.0000
## 320 0.0020 nan 0.1000 -0.0000
## 340 0.0019 nan 0.1000 -0.0000
## 360 0.0017 nan 0.1000 -0.0000
## 380 0.0016 nan 0.1000 -0.0000
## 400 0.0014 nan 0.1000 -0.0000
## 420 0.0013 nan 0.1000 -0.0000
## 440 0.0012 nan 0.1000 -0.0000
## 460 0.0011 nan 0.1000 -0.0000
## 480 0.0010 nan 0.1000 -0.0000
## 500 0.0009 nan 0.1000 -0.0000
##
## - Fold4: shrinkage=0.1, interaction.depth= 8, n.minobsinnode=10, n.trees=500
## + Fold4: shrinkage=0.1, interaction.depth= 9, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1373 nan 0.1000 0.0236
## 2 0.1193 nan 0.1000 0.0181
## 3 0.1040 nan 0.1000 0.0157
## 4 0.0903 nan 0.1000 0.0125
## 5 0.0792 nan 0.1000 0.0095
## 6 0.0699 nan 0.1000 0.0089
## 7 0.0624 nan 0.1000 0.0064
## 8 0.0557 nan 0.1000 0.0064
## 9 0.0500 nan 0.1000 0.0046
## 10 0.0447 nan 0.1000 0.0050
## 20 0.0204 nan 0.1000 0.0009
## 40 0.0107 nan 0.1000 -0.0000
## 60 0.0083 nan 0.1000 -0.0001
## 80 0.0068 nan 0.1000 -0.0000
## 100 0.0059 nan 0.1000 -0.0000
## 120 0.0051 nan 0.1000 -0.0000
## 140 0.0044 nan 0.1000 -0.0000
## 160 0.0039 nan 0.1000 -0.0000
## 180 0.0034 nan 0.1000 -0.0000
## 200 0.0030 nan 0.1000 -0.0000
## 220 0.0027 nan 0.1000 -0.0000
## 240 0.0024 nan 0.1000 -0.0000
## 260 0.0021 nan 0.1000 -0.0000
## 280 0.0019 nan 0.1000 -0.0000
## 300 0.0017 nan 0.1000 -0.0000
## 320 0.0016 nan 0.1000 -0.0000
## 340 0.0014 nan 0.1000 -0.0000
## 360 0.0013 nan 0.1000 -0.0000
## 380 0.0012 nan 0.1000 -0.0000
## 400 0.0011 nan 0.1000 -0.0000
## 420 0.0010 nan 0.1000 -0.0000
## 440 0.0009 nan 0.1000 -0.0000
## 460 0.0008 nan 0.1000 -0.0000
## 480 0.0007 nan 0.1000 -0.0000
## 500 0.0007 nan 0.1000 -0.0000
##
## - Fold4: shrinkage=0.1, interaction.depth= 9, n.minobsinnode=10, n.trees=500
## + Fold4: shrinkage=0.1, interaction.depth=10, n.minobsinnode=10, n.trees=500
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1379 nan 0.1000 0.0233
## 2 0.1189 nan 0.1000 0.0185
## 3 0.1041 nan 0.1000 0.0151
## 4 0.0912 nan 0.1000 0.0136
## 5 0.0804 nan 0.1000 0.0095
## 6 0.0706 nan 0.1000 0.0091
## 7 0.0624 nan 0.1000 0.0077
## 8 0.0555 nan 0.1000 0.0068
## 9 0.0497 nan 0.1000 0.0053
## 10 0.0450 nan 0.1000 0.0043
## 20 0.0203 nan 0.1000 0.0013
## 40 0.0106 nan 0.1000 0.0000
## 60 0.0079 nan 0.1000 -0.0001
## 80 0.0066 nan 0.1000 -0.0000
## 100 0.0054 nan 0.1000 -0.0000
## 120 0.0046 nan 0.1000 -0.0000
## 140 0.0039 nan 0.1000 -0.0000
## 160 0.0034 nan 0.1000 -0.0000
## 180 0.0030 nan 0.1000 -0.0000
## 200 0.0026 nan 0.1000 -0.0000
## 220 0.0023 nan 0.1000 -0.0000
## 240 0.0021 nan 0.1000 -0.0000
## 260 0.0018 nan 0.1000 -0.0000
## 280 0.0016 nan 0.1000 -0.0000
## 300 0.0014 nan 0.1000 -0.0000
## 320 0.0013 nan 0.1000 -0.0000
## 340 0.0011 nan 0.1000 -0.0000
## 360 0.0010 nan 0.1000 -0.0000
## 380 0.0009 nan 0.1000 -0.0000
## 400 0.0008 nan 0.1000 -0.0000
## 420 0.0007 nan 0.1000 -0.0000
## 440 0.0007 nan 0.1000 -0.0000
## 460 0.0006 nan 0.1000 -0.0000
## 480 0.0005 nan 0.1000 -0.0000
## 500 0.0005 nan 0.1000 -0.0000
##
## - Fold4: shrinkage=0.1, interaction.depth=10, n.minobsinnode=10, n.trees=500
## + Fold5: shrinkage=0.1, interaction.depth= 1, n.minobsinnode=10, n.trees=500
## model fit failed for Fold5: shrinkage=0.1, interaction.depth= 1, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold5: shrinkage=0.1, interaction.depth= 1, n.minobsinnode=10, n.trees=500
## + Fold5: shrinkage=0.1, interaction.depth= 2, n.minobsinnode=10, n.trees=500
## model fit failed for Fold5: shrinkage=0.1, interaction.depth= 2, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold5: shrinkage=0.1, interaction.depth= 2, n.minobsinnode=10, n.trees=500
## + Fold5: shrinkage=0.1, interaction.depth= 3, n.minobsinnode=10, n.trees=500
## model fit failed for Fold5: shrinkage=0.1, interaction.depth= 3, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold5: shrinkage=0.1, interaction.depth= 3, n.minobsinnode=10, n.trees=500
## + Fold5: shrinkage=0.1, interaction.depth= 4, n.minobsinnode=10, n.trees=500
## model fit failed for Fold5: shrinkage=0.1, interaction.depth= 4, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold5: shrinkage=0.1, interaction.depth= 4, n.minobsinnode=10, n.trees=500
## + Fold5: shrinkage=0.1, interaction.depth= 5, n.minobsinnode=10, n.trees=500
## model fit failed for Fold5: shrinkage=0.1, interaction.depth= 5, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold5: shrinkage=0.1, interaction.depth= 5, n.minobsinnode=10, n.trees=500
## + Fold5: shrinkage=0.1, interaction.depth= 6, n.minobsinnode=10, n.trees=500
## model fit failed for Fold5: shrinkage=0.1, interaction.depth= 6, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold5: shrinkage=0.1, interaction.depth= 6, n.minobsinnode=10, n.trees=500
## + Fold5: shrinkage=0.1, interaction.depth= 7, n.minobsinnode=10, n.trees=500
## model fit failed for Fold5: shrinkage=0.1, interaction.depth= 7, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold5: shrinkage=0.1, interaction.depth= 7, n.minobsinnode=10, n.trees=500
## + Fold5: shrinkage=0.1, interaction.depth= 8, n.minobsinnode=10, n.trees=500
## model fit failed for Fold5: shrinkage=0.1, interaction.depth= 8, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold5: shrinkage=0.1, interaction.depth= 8, n.minobsinnode=10, n.trees=500
## + Fold5: shrinkage=0.1, interaction.depth= 9, n.minobsinnode=10, n.trees=500
## model fit failed for Fold5: shrinkage=0.1, interaction.depth= 9, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold5: shrinkage=0.1, interaction.depth= 9, n.minobsinnode=10, n.trees=500
## + Fold5: shrinkage=0.1, interaction.depth=10, n.minobsinnode=10, n.trees=500
## model fit failed for Fold5: shrinkage=0.1, interaction.depth=10, n.minobsinnode=10, n.trees=500 Error in checkMissing(x, y) :
## Use NA for missing values. NaN found in predictor variables:LandSlope
##
## - Fold5: shrinkage=0.1, interaction.depth=10, n.minobsinnode=10, n.trees=500
## Aggregating results
## Selecting tuning parameters
## Fitting n.trees = 200, interaction.depth = 7, shrinkage = 0.1, n.minobsinnode = 10 on full training set
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 0.1389 nan 0.1000 0.0215
## 2 0.1208 nan 0.1000 0.0182
## 3 0.1060 nan 0.1000 0.0139
## 4 0.0935 nan 0.1000 0.0108
## 5 0.0831 nan 0.1000 0.0102
## 6 0.0745 nan 0.1000 0.0080
## 7 0.0668 nan 0.1000 0.0065
## 8 0.0602 nan 0.1000 0.0066
## 9 0.0546 nan 0.1000 0.0049
## 10 0.0496 nan 0.1000 0.0047
## 20 0.0238 nan 0.1000 0.0011
## 40 0.0124 nan 0.1000 0.0001
## 60 0.0098 nan 0.1000 -0.0000
## 80 0.0084 nan 0.1000 -0.0000
## 100 0.0074 nan 0.1000 -0.0000
## 120 0.0067 nan 0.1000 -0.0000
## 140 0.0060 nan 0.1000 -0.0000
## 160 0.0055 nan 0.1000 -0.0000
## 180 0.0050 nan 0.1000 -0.0000
## 200 0.0046 nan 0.1000 -0.0000
##
## track experiment with mlflow
## generating output
varImp(gbm)
## gbm variable importance
##
## only 20 most important variables shown (out of 64)
##
## Overall
## OverallQual 100.000
## Neighborhood 38.220
## 1stFlrSF 17.702
## GarageArea 14.430
## KitchenQual 12.321
## 2ndFlrSF 9.693
## BsmtFinSF1 9.091
## Fireplaces 8.061
## LotArea 7.657
## TotRmsAbvGrd 6.509
## YearRemodAdd 5.176
## FullBath 4.784
## OverallCond 4.327
## CentralAir 3.478
## GarageFinish 3.087
## MSSubClass 2.965
## GarageYrBlt 2.499
## BsmtUnfSF 2.384
## GarageType 2.198
## ExterQual 1.505
submission <- test %>%
daml_predict(model = gbm, pred_field = "SalePrice") %>%
select(Id, SalePrice) %>%
mutate(SalePrice = exp(SalePrice))
write.csv(submission, "gbm_baseline.csv", row.names = F)
Extraordinary - Our submission scored 0.12577 on a leaderboard!
Now lets take a look at other ideas like feature interactions, binning, PCA, Ensemble models, dummy features for imputed records.